LLVM 9.0.1
ARMISelLowering.cpp
Go to the documentation of this file.
1//===- ARMISelLowering.cpp - ARM DAG Lowering Implementation --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that ARM uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "ARMISelLowering.h"
15#include "ARMBaseInstrInfo.h"
16#include "ARMBaseRegisterInfo.h"
17#include "ARMCallingConv.h"
20#include "ARMPerfectShuffle.h"
21#include "ARMRegisterInfo.h"
22#include "ARMSelectionDAGInfo.h"
23#include "ARMSubtarget.h"
26#include "Utils/ARMBaseInfo.h"
27#include "llvm/ADT/APFloat.h"
28#include "llvm/ADT/APInt.h"
29#include "llvm/ADT/ArrayRef.h"
30#include "llvm/ADT/BitVector.h"
31#include "llvm/ADT/DenseMap.h"
32#include "llvm/ADT/STLExtras.h"
35#include "llvm/ADT/Statistic.h"
37#include "llvm/ADT/StringRef.h"
39#include "llvm/ADT/Triple.h"
40#include "llvm/ADT/Twine.h"
64#include "llvm/IR/Attributes.h"
65#include "llvm/IR/CallingConv.h"
66#include "llvm/IR/Constant.h"
67#include "llvm/IR/Constants.h"
68#include "llvm/IR/DataLayout.h"
69#include "llvm/IR/DebugLoc.h"
71#include "llvm/IR/Function.h"
72#include "llvm/IR/GlobalAlias.h"
73#include "llvm/IR/GlobalValue.h"
75#include "llvm/IR/IRBuilder.h"
76#include "llvm/IR/InlineAsm.h"
77#include "llvm/IR/Instruction.h"
80#include "llvm/IR/Intrinsics.h"
81#include "llvm/IR/Module.h"
83#include "llvm/IR/Type.h"
84#include "llvm/IR/User.h"
85#include "llvm/IR/Value.h"
86#include "llvm/MC/MCInstrDesc.h"
89#include "llvm/MC/MCSchedule.h"
96#include "llvm/Support/Debug.h"
104#include <algorithm>
105#include <cassert>
106#include <cstdint>
107#include <cstdlib>
108#include <iterator>
109#include <limits>
110#include <string>
111#include <tuple>
112#include <utility>
113#include <vector>
114
115using namespace llvm;
116using namespace llvm::PatternMatch;
117
118#define DEBUG_TYPE "arm-isel"
119
120STATISTIC(NumTailCalls, "Number of tail calls");
121STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt");
122STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments");
123STATISTIC(NumConstpoolPromoted,
124 "Number of constants with their storage promoted into constant pools");
125
126static cl::opt<bool>
127ARMInterworking("arm-interworking", cl::Hidden,
128 cl::desc("Enable / disable ARM interworking (for debugging only)"),
129 cl::init(true));
130
132 "arm-promote-constant", cl::Hidden,
133 cl::desc("Enable / disable promotion of unnamed_addr constants into "
134 "constant pools"),
135 cl::init(false)); // FIXME: set to true by default once PR32780 is fixed
137 "arm-promote-constant-max-size", cl::Hidden,
138 cl::desc("Maximum size of constant to promote into a constant pool"),
139 cl::init(64));
141 "arm-promote-constant-max-total", cl::Hidden,
142 cl::desc("Maximum size of ALL constants to promote into a constant pool"),
143 cl::init(128));
144
145// The APCS parameter registers.
146static const MCPhysReg GPRArgRegs[] = {
147 ARM::R0, ARM::R1, ARM::R2, ARM::R3
148};
149
150void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT,
151 MVT PromotedBitwiseVT) {
152 if (VT != PromotedLdStVT) {
154 AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT);
155
157 AddPromotedToType (ISD::STORE, VT, PromotedLdStVT);
158 }
159
160 MVT ElemTy = VT.getVectorElementType();
161 if (ElemTy != MVT::f64)
165 if (ElemTy == MVT::i32) {
170 } else {
175 }
184 if (VT.isInteger()) {
188 }
189
190 // Promote all bit-wise operations.
191 if (VT.isInteger() && VT != PromotedBitwiseVT) {
193 AddPromotedToType (ISD::AND, VT, PromotedBitwiseVT);
195 AddPromotedToType (ISD::OR, VT, PromotedBitwiseVT);
197 AddPromotedToType (ISD::XOR, VT, PromotedBitwiseVT);
198 }
199
200 // Neon does not support vector divide/remainder operations.
207
208 if (!VT.isFloatingPoint() &&
209 VT != MVT::v2i64 && VT != MVT::v1i64)
210 for (auto Opcode : {ISD::ABS, ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
211 setOperationAction(Opcode, VT, Legal);
212}
213
214void ARMTargetLowering::addDRTypeForNEON(MVT VT) {
215 addRegisterClass(VT, &ARM::DPRRegClass);
216 addTypeForNEON(VT, MVT::f64, MVT::v2i32);
217}
218
219void ARMTargetLowering::addQRTypeForNEON(MVT VT) {
220 addRegisterClass(VT, &ARM::DPairRegClass);
221 addTypeForNEON(VT, MVT::v2f64, MVT::v4i32);
222}
223
224void ARMTargetLowering::setAllExpand(MVT VT) {
225 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
226 setOperationAction(Opc, VT, Expand);
227
228 // We support these really simple operations even on types where all
229 // the actual arithmetic has to be broken down into simpler
230 // operations or turned into library calls.
235}
236
237void ARMTargetLowering::addAllExtLoads(const MVT From, const MVT To,
238 LegalizeAction Action) {
239 setLoadExtAction(ISD::EXTLOAD, From, To, Action);
240 setLoadExtAction(ISD::ZEXTLOAD, From, To, Action);
241 setLoadExtAction(ISD::SEXTLOAD, From, To, Action);
242}
243
244void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
245 const MVT IntTypes[] = { MVT::v16i8, MVT::v8i16, MVT::v4i32 };
246
247 for (auto VT : IntTypes) {
248 addRegisterClass(VT, &ARM::QPRRegClass);
261
262 // No native support for these.
267
268 if (!HasMVEFP) {
273 }
274 }
275
276 const MVT FloatTypes[] = { MVT::v8f16, MVT::v4f32 };
277 for (auto VT : FloatTypes) {
278 addRegisterClass(VT, &ARM::QPRRegClass);
279 if (!HasMVEFP)
280 setAllExpand(VT);
281
282 // These are legal or custom whether we have MVE.fp or not
290
291 if (HasMVEFP) {
295
296 // No native support for these.
309 }
310 }
311
312 // We 'support' these types up to bitcast/load/store level, regardless of
313 // MVE integer-only / float support. Only doing FP data processing on the FP
314 // vector types is inhibited at integer-only level.
315 const MVT LongTypes[] = { MVT::v2i64, MVT::v2f64 };
316 for (auto VT : LongTypes) {
317 addRegisterClass(VT, &ARM::QPRRegClass);
318 setAllExpand(VT);
322 }
323 // We can do bitwise operations on v2i64 vectors
327
328 // It is legal to extload from v4i8 to v4i16 or v4i32.
329 addAllExtLoads(MVT::v8i16, MVT::v8i8, Legal);
330 addAllExtLoads(MVT::v4i32, MVT::v4i16, Legal);
331 addAllExtLoads(MVT::v4i32, MVT::v4i8, Legal);
332
333 // Some truncating stores are legal too.
337}
338
340 const ARMSubtarget &STI)
341 : TargetLowering(TM), Subtarget(&STI) {
342 RegInfo = Subtarget->getRegisterInfo();
343 Itins = Subtarget->getInstrItineraryData();
344
347
348 if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetIOS() &&
349 !Subtarget->isTargetWatchOS()) {
350 bool IsHFTarget = TM.Options.FloatABIType == FloatABI::Hard;
351 for (int LCID = 0; LCID < RTLIB::UNKNOWN_LIBCALL; ++LCID)
352 setLibcallCallingConv(static_cast<RTLIB::Libcall>(LCID),
353 IsHFTarget ? CallingConv::ARM_AAPCS_VFP
355 }
356
357 if (Subtarget->isTargetMachO()) {
358 // Uses VFP for Thumb libfuncs if available.
359 if (Subtarget->isThumb() && Subtarget->hasVFP2Base() &&
360 Subtarget->hasARMOps() && !Subtarget->useSoftFloat()) {
361 static const struct {
362 const RTLIB::Libcall Op;
363 const char * const Name;
364 const ISD::CondCode Cond;
365 } LibraryCalls[] = {
366 // Single-precision floating-point arithmetic.
367 { RTLIB::ADD_F32, "__addsf3vfp", ISD::SETCC_INVALID },
368 { RTLIB::SUB_F32, "__subsf3vfp", ISD::SETCC_INVALID },
369 { RTLIB::MUL_F32, "__mulsf3vfp", ISD::SETCC_INVALID },
370 { RTLIB::DIV_F32, "__divsf3vfp", ISD::SETCC_INVALID },
371
372 // Double-precision floating-point arithmetic.
373 { RTLIB::ADD_F64, "__adddf3vfp", ISD::SETCC_INVALID },
374 { RTLIB::SUB_F64, "__subdf3vfp", ISD::SETCC_INVALID },
375 { RTLIB::MUL_F64, "__muldf3vfp", ISD::SETCC_INVALID },
376 { RTLIB::DIV_F64, "__divdf3vfp", ISD::SETCC_INVALID },
377
378 // Single-precision comparisons.
379 { RTLIB::OEQ_F32, "__eqsf2vfp", ISD::SETNE },
380 { RTLIB::UNE_F32, "__nesf2vfp", ISD::SETNE },
381 { RTLIB::OLT_F32, "__ltsf2vfp", ISD::SETNE },
382 { RTLIB::OLE_F32, "__lesf2vfp", ISD::SETNE },
383 { RTLIB::OGE_F32, "__gesf2vfp", ISD::SETNE },
384 { RTLIB::OGT_F32, "__gtsf2vfp", ISD::SETNE },
385 { RTLIB::UO_F32, "__unordsf2vfp", ISD::SETNE },
386 { RTLIB::O_F32, "__unordsf2vfp", ISD::SETEQ },
387
388 // Double-precision comparisons.
389 { RTLIB::OEQ_F64, "__eqdf2vfp", ISD::SETNE },
390 { RTLIB::UNE_F64, "__nedf2vfp", ISD::SETNE },
391 { RTLIB::OLT_F64, "__ltdf2vfp", ISD::SETNE },
392 { RTLIB::OLE_F64, "__ledf2vfp", ISD::SETNE },
393 { RTLIB::OGE_F64, "__gedf2vfp", ISD::SETNE },
394 { RTLIB::OGT_F64, "__gtdf2vfp", ISD::SETNE },
395 { RTLIB::UO_F64, "__unorddf2vfp", ISD::SETNE },
396 { RTLIB::O_F64, "__unorddf2vfp", ISD::SETEQ },
397
398 // Floating-point to integer conversions.
399 // i64 conversions are done via library routines even when generating VFP
400 // instructions, so use the same ones.
401 { RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp", ISD::SETCC_INVALID },
402 { RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp", ISD::SETCC_INVALID },
403 { RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp", ISD::SETCC_INVALID },
404 { RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp", ISD::SETCC_INVALID },
405
406 // Conversions between floating types.
407 { RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp", ISD::SETCC_INVALID },
408 { RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp", ISD::SETCC_INVALID },
409
410 // Integer to floating-point conversions.
411 // i64 conversions are done via library routines even when generating VFP
412 // instructions, so use the same ones.
413 // FIXME: There appears to be some naming inconsistency in ARM libgcc:
414 // e.g., __floatunsidf vs. __floatunssidfvfp.
415 { RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp", ISD::SETCC_INVALID },
416 { RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp", ISD::SETCC_INVALID },
417 { RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp", ISD::SETCC_INVALID },
418 { RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp", ISD::SETCC_INVALID },
419 };
420
421 for (const auto &LC : LibraryCalls) {
422 setLibcallName(LC.Op, LC.Name);
423 if (LC.Cond != ISD::SETCC_INVALID)
424 setCmpLibcallCC(LC.Op, LC.Cond);
425 }
426 }
427 }
428
429 // These libcalls are not available in 32-bit.
430 setLibcallName(RTLIB::SHL_I128, nullptr);
431 setLibcallName(RTLIB::SRL_I128, nullptr);
432 setLibcallName(RTLIB::SRA_I128, nullptr);
433
434 // RTLIB
435 if (Subtarget->isAAPCS_ABI() &&
436 (Subtarget->isTargetAEABI() || Subtarget->isTargetGNUAEABI() ||
437 Subtarget->isTargetMuslAEABI() || Subtarget->isTargetAndroid())) {
438 static const struct {
439 const RTLIB::Libcall Op;
440 const char * const Name;
441 const CallingConv::ID CC;
442 const ISD::CondCode Cond;
443 } LibraryCalls[] = {
444 // Double-precision floating-point arithmetic helper functions
445 // RTABI chapter 4.1.2, Table 2
446 { RTLIB::ADD_F64, "__aeabi_dadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
447 { RTLIB::DIV_F64, "__aeabi_ddiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
448 { RTLIB::MUL_F64, "__aeabi_dmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
449 { RTLIB::SUB_F64, "__aeabi_dsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
450
451 // Double-precision floating-point comparison helper functions
452 // RTABI chapter 4.1.2, Table 3
453 { RTLIB::OEQ_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
454 { RTLIB::UNE_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
455 { RTLIB::OLT_F64, "__aeabi_dcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
456 { RTLIB::OLE_F64, "__aeabi_dcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
457 { RTLIB::OGE_F64, "__aeabi_dcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
458 { RTLIB::OGT_F64, "__aeabi_dcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
459 { RTLIB::UO_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
460 { RTLIB::O_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ },
461
462 // Single-precision floating-point arithmetic helper functions
463 // RTABI chapter 4.1.2, Table 4
464 { RTLIB::ADD_F32, "__aeabi_fadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
465 { RTLIB::DIV_F32, "__aeabi_fdiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
466 { RTLIB::MUL_F32, "__aeabi_fmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
467 { RTLIB::SUB_F32, "__aeabi_fsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
468
469 // Single-precision floating-point comparison helper functions
470 // RTABI chapter 4.1.2, Table 5
471 { RTLIB::OEQ_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
472 { RTLIB::UNE_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
473 { RTLIB::OLT_F32, "__aeabi_fcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
474 { RTLIB::OLE_F32, "__aeabi_fcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
475 { RTLIB::OGE_F32, "__aeabi_fcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
476 { RTLIB::OGT_F32, "__aeabi_fcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
477 { RTLIB::UO_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
478 { RTLIB::O_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ },
479
480 // Floating-point to integer conversions.
481 // RTABI chapter 4.1.2, Table 6
482 { RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
483 { RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
484 { RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
485 { RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
486 { RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
487 { RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
488 { RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
489 { RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
490
491 // Conversions between floating types.
492 // RTABI chapter 4.1.2, Table 7
493 { RTLIB::FPROUND_F64_F32, "__aeabi_d2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
494 { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
495 { RTLIB::FPEXT_F32_F64, "__aeabi_f2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
496
497 // Integer to floating-point conversions.
498 // RTABI chapter 4.1.2, Table 8
499 { RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
500 { RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
501 { RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
502 { RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
503 { RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
504 { RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
505 { RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
506 { RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
507
508 // Long long helper functions
509 // RTABI chapter 4.2, Table 9
510 { RTLIB::MUL_I64, "__aeabi_lmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
511 { RTLIB::SHL_I64, "__aeabi_llsl", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
512 { RTLIB::SRL_I64, "__aeabi_llsr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
513 { RTLIB::SRA_I64, "__aeabi_lasr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
514
515 // Integer division functions
516 // RTABI chapter 4.3.1
517 { RTLIB::SDIV_I8, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
518 { RTLIB::SDIV_I16, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
519 { RTLIB::SDIV_I32, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
520 { RTLIB::SDIV_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
521 { RTLIB::UDIV_I8, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
522 { RTLIB::UDIV_I16, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
523 { RTLIB::UDIV_I32, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
524 { RTLIB::UDIV_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
525 };
526
527 for (const auto &LC : LibraryCalls) {
528 setLibcallName(LC.Op, LC.Name);
529 setLibcallCallingConv(LC.Op, LC.CC);
530 if (LC.Cond != ISD::SETCC_INVALID)
531 setCmpLibcallCC(LC.Op, LC.Cond);
532 }
533
534 // EABI dependent RTLIB
535 if (TM.Options.EABIVersion == EABI::EABI4 ||
536 TM.Options.EABIVersion == EABI::EABI5) {
537 static const struct {
538 const RTLIB::Libcall Op;
539 const char *const Name;
540 const CallingConv::ID CC;
541 const ISD::CondCode Cond;
542 } MemOpsLibraryCalls[] = {
543 // Memory operations
544 // RTABI chapter 4.3.4
545 { RTLIB::MEMCPY, "__aeabi_memcpy", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
546 { RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
547 { RTLIB::MEMSET, "__aeabi_memset", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
548 };
549
550 for (const auto &LC : MemOpsLibraryCalls) {
551 setLibcallName(LC.Op, LC.Name);
552 setLibcallCallingConv(LC.Op, LC.CC);
553 if (LC.Cond != ISD::SETCC_INVALID)
554 setCmpLibcallCC(LC.Op, LC.Cond);
555 }
556 }
557 }
558
559 if (Subtarget->isTargetWindows()) {
560 static const struct {
561 const RTLIB::Libcall Op;
562 const char * const Name;
563 const CallingConv::ID CC;
564 } LibraryCalls[] = {
565 { RTLIB::FPTOSINT_F32_I64, "__stoi64", CallingConv::ARM_AAPCS_VFP },
566 { RTLIB::FPTOSINT_F64_I64, "__dtoi64", CallingConv::ARM_AAPCS_VFP },
567 { RTLIB::FPTOUINT_F32_I64, "__stou64", CallingConv::ARM_AAPCS_VFP },
568 { RTLIB::FPTOUINT_F64_I64, "__dtou64", CallingConv::ARM_AAPCS_VFP },
569 { RTLIB::SINTTOFP_I64_F32, "__i64tos", CallingConv::ARM_AAPCS_VFP },
570 { RTLIB::SINTTOFP_I64_F64, "__i64tod", CallingConv::ARM_AAPCS_VFP },
571 { RTLIB::UINTTOFP_I64_F32, "__u64tos", CallingConv::ARM_AAPCS_VFP },
572 { RTLIB::UINTTOFP_I64_F64, "__u64tod", CallingConv::ARM_AAPCS_VFP },
573 };
574
575 for (const auto &LC : LibraryCalls) {
576 setLibcallName(LC.Op, LC.Name);
577 setLibcallCallingConv(LC.Op, LC.CC);
578 }
579 }
580
581 // Use divmod compiler-rt calls for iOS 5.0 and later.
582 if (Subtarget->isTargetMachO() &&
583 !(Subtarget->isTargetIOS() &&
584 Subtarget->getTargetTriple().isOSVersionLT(5, 0))) {
585 setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4");
586 setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");
587 }
588
589 // The half <-> float conversion functions are always soft-float on
590 // non-watchos platforms, but are needed for some targets which use a
591 // hard-float calling convention by default.
592 if (!Subtarget->isTargetWatchABI()) {
593 if (Subtarget->isAAPCS_ABI()) {
594 setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_AAPCS);
595 setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_AAPCS);
596 setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_AAPCS);
597 } else {
598 setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_APCS);
599 setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_APCS);
600 setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_APCS);
601 }
602 }
603
604 // In EABI, these functions have an __aeabi_ prefix, but in GNUEABI they have
605 // a __gnu_ prefix (which is the default).
606 if (Subtarget->isTargetAEABI()) {
607 static const struct {
608 const RTLIB::Libcall Op;
609 const char * const Name;
610 const CallingConv::ID CC;
611 } LibraryCalls[] = {
612 { RTLIB::FPROUND_F32_F16, "__aeabi_f2h", CallingConv::ARM_AAPCS },
613 { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS },
614 { RTLIB::FPEXT_F16_F32, "__aeabi_h2f", CallingConv::ARM_AAPCS },
615 };
616
617 for (const auto &LC : LibraryCalls) {
618 setLibcallName(LC.Op, LC.Name);
619 setLibcallCallingConv(LC.Op, LC.CC);
620 }
621 }
622
623 if (Subtarget->isThumb1Only())
624 addRegisterClass(MVT::i32, &ARM::tGPRRegClass);
625 else
626 addRegisterClass(MVT::i32, &ARM::GPRRegClass);
627
628 if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only() &&
629 Subtarget->hasFPRegs()) {
630 addRegisterClass(MVT::f32, &ARM::SPRRegClass);
631 addRegisterClass(MVT::f64, &ARM::DPRRegClass);
632 if (!Subtarget->hasVFP2Base())
633 setAllExpand(MVT::f32);
634 if (!Subtarget->hasFP64())
635 setAllExpand(MVT::f64);
636 }
637
638 if (Subtarget->hasFullFP16()) {
639 addRegisterClass(MVT::f16, &ARM::HPRRegClass);
643
646 }
647
648 for (MVT VT : MVT::vector_valuetypes()) {
649 for (MVT InnerVT : MVT::vector_valuetypes()) {
650 setTruncStoreAction(VT, InnerVT, Expand);
651 addAllExtLoads(VT, InnerVT, Expand);
652 }
653
658
660 }
661
664
667
668 if (Subtarget->hasMVEIntegerOps())
669 addMVEVectorTypes(Subtarget->hasMVEFloatOps());
670
671 // Combine low-overhead loop intrinsics so that we can lower i1 types.
672 if (Subtarget->hasLOB())
674
675 if (Subtarget->hasNEON()) {
676 addDRTypeForNEON(MVT::v2f32);
677 addDRTypeForNEON(MVT::v8i8);
678 addDRTypeForNEON(MVT::v4i16);
679 addDRTypeForNEON(MVT::v2i32);
680 addDRTypeForNEON(MVT::v1i64);
681
682 addQRTypeForNEON(MVT::v4f32);
683 addQRTypeForNEON(MVT::v2f64);
684 addQRTypeForNEON(MVT::v16i8);
685 addQRTypeForNEON(MVT::v8i16);
686 addQRTypeForNEON(MVT::v4i32);
687 addQRTypeForNEON(MVT::v2i64);
688
689 if (Subtarget->hasFullFP16()) {
690 addQRTypeForNEON(MVT::v8f16);
691 addDRTypeForNEON(MVT::v4f16);
692 }
693 }
694
695 if (Subtarget->hasMVEIntegerOps() || Subtarget->hasNEON()) {
696 // v2f64 is legal so that QR subregs can be extracted as f64 elements, but
697 // none of Neon, MVE or VFP supports any arithmetic operations on it.
701 // FIXME: Code duplication: FDIV and FREM are expanded always, see
702 // ARMTargetLowering::addTypeForNEON method for details.
705 // FIXME: Create unittest.
706 // In another words, find a way when "copysign" appears in DAG with vector
707 // operands.
709 // FIXME: Code duplication: SETCC has custom operation action, see
710 // ARMTargetLowering::addTypeForNEON method for details.
712 // FIXME: Create unittest for FNEG and for FABS.
724 // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR.
731 }
732
733 if (Subtarget->hasNEON()) {
734 // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively
735 // supported for v4f32.
750
751 // Mark v2f32 intrinsics.
766
767 // Neon does not support some operations on v1i64 and v2i64 types.
769 // Custom handling for some quad-vector types to detect VMULL.
773 // Custom handling for some vector types to avoid expensive expansions
778 // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with
779 // a destination type that is wider than the source, and nor does
780 // it have a FP_TO_[SU]INT instruction with a narrower destination than
781 // source.
790
793
794 // NEON does not have single instruction CTPOP for vectors with element
795 // types wider than 8-bits. However, custom lowering can leverage the
796 // v8i8/v16i8 vcnt instruction.
803
806
807 // NEON does not have single instruction CTTZ for vectors.
812
817
822
827
828 // NEON only has FMA instructions as of VFP4.
829 if (!Subtarget->hasVFP4Base()) {
832 }
833
848
849 // It is legal to extload from v4i8 to v4i16 or v4i32.
851 MVT::v2i32}) {
856 }
857 }
858 }
859
860 if (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) {
864 }
865
866 if (!Subtarget->hasFP64()) {
867 // When targeting a floating-point unit with only single-precision
868 // operations, f64 is legal for the few double-precision instructions which
869 // are present However, no double-precision operations other than moves,
870 // loads and stores are provided by the hardware.
902 }
903
904 if (!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()){
907 }
908
909 if (!Subtarget->hasFP16())
911
912 if (!Subtarget->hasFP64())
914
916
917 // ARM does not have floating-point extending loads.
918 for (MVT VT : MVT::fp_valuetypes()) {
921 }
922
923 // ... or truncating stores
927
928 // ARM does not have i1 sign extending load.
929 for (MVT VT : MVT::integer_valuetypes())
931
932 // ARM supports all 4 flavors of integer indexed load / store.
933 if (!Subtarget->isThumb1Only()) {
934 for (unsigned im = (unsigned)ISD::PRE_INC;
944 }
945 } else {
946 // Thumb-1 has limited post-inc load/store support - LDM r0!, {r1}.
949 }
950
955
958
959 // i64 operation support.
962 if (Subtarget->isThumb1Only()) {
965 }
966 if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops()
967 || (Subtarget->isThumb2() && !Subtarget->hasDSP()))
969
976
977 // MVE lowers 64 bit shifts to lsll and lsrl
978 // assuming that ISD::SRL and SRA of i64 are already marked custom
979 if (Subtarget->hasMVEIntegerOps())
981
982 // Expand to __aeabi_l{lsl,lsr,asr} calls for Thumb1.
983 if (Subtarget->isThumb1Only()) {
987 }
988
989 if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops())
991
992 // ARM does not have ROTL.
994 for (MVT VT : MVT::vector_valuetypes()) {
997 }
1000 if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) {
1003 }
1004
1005 // @llvm.readcyclecounter requires the Performance Monitors extension.
1006 // Default to the 0 expansion on unsupported platforms.
1007 // FIXME: Technically there are older ARM CPUs that have
1008 // implementation-specific ways of obtaining this information.
1009 if (Subtarget->hasPerfMon())
1011
1012 // Only ARMv6 has BSWAP.
1013 if (!Subtarget->hasV6Ops())
1015
1016 bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode()
1017 : Subtarget->hasDivideInARMMode();
1018 if (!hasDivide) {
1019 // These are expanded into libcalls if the cpu doesn't have HW divider.
1022 }
1023
1024 if (Subtarget->isTargetWindows() && !Subtarget->hasDivideInThumbMode()) {
1027
1030 }
1031
1034
1035 // Register based DivRem for AEABI (RTABI 4.2)
1036 if (Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() ||
1037 Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() ||
1038 Subtarget->isTargetWindows()) {
1041 HasStandaloneRem = false;
1042
1043 if (Subtarget->isTargetWindows()) {
1044 const struct {
1045 const RTLIB::Libcall Op;
1046 const char * const Name;
1047 const CallingConv::ID CC;
1048 } LibraryCalls[] = {
1049 { RTLIB::SDIVREM_I8, "__rt_sdiv", CallingConv::ARM_AAPCS },
1050 { RTLIB::SDIVREM_I16, "__rt_sdiv", CallingConv::ARM_AAPCS },
1051 { RTLIB::SDIVREM_I32, "__rt_sdiv", CallingConv::ARM_AAPCS },
1052 { RTLIB::SDIVREM_I64, "__rt_sdiv64", CallingConv::ARM_AAPCS },
1053
1054 { RTLIB::UDIVREM_I8, "__rt_udiv", CallingConv::ARM_AAPCS },
1055 { RTLIB::UDIVREM_I16, "__rt_udiv", CallingConv::ARM_AAPCS },
1056 { RTLIB::UDIVREM_I32, "__rt_udiv", CallingConv::ARM_AAPCS },
1057 { RTLIB::UDIVREM_I64, "__rt_udiv64", CallingConv::ARM_AAPCS },
1058 };
1059
1060 for (const auto &LC : LibraryCalls) {
1061 setLibcallName(LC.Op, LC.Name);
1062 setLibcallCallingConv(LC.Op, LC.CC);
1063 }
1064 } else {
1065 const struct {
1066 const RTLIB::Libcall Op;
1067 const char * const Name;
1068 const CallingConv::ID CC;
1069 } LibraryCalls[] = {
1070 { RTLIB::SDIVREM_I8, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
1071 { RTLIB::SDIVREM_I16, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
1072 { RTLIB::SDIVREM_I32, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
1073 { RTLIB::SDIVREM_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS },
1074
1075 { RTLIB::UDIVREM_I8, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
1076 { RTLIB::UDIVREM_I16, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
1077 { RTLIB::UDIVREM_I32, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
1078 { RTLIB::UDIVREM_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS },
1079 };
1080
1081 for (const auto &LC : LibraryCalls) {
1082 setLibcallName(LC.Op, LC.Name);
1083 setLibcallCallingConv(LC.Op, LC.CC);
1084 }
1085 }
1086
1091 } else {
1094 }
1095
1096 if (Subtarget->isTargetWindows() && Subtarget->getTargetTriple().isOSMSVCRT())
1097 for (auto &VT : {MVT::f32, MVT::f64})
1099
1104
1107
1108 // Use the default implementation.
1115
1116 if (Subtarget->isTargetWindows())
1118 else
1120
1121 // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
1122 // the default expansion.
1123 InsertFencesForAtomic = false;
1124 if (Subtarget->hasAnyDataBarrier() &&
1125 (!Subtarget->isThumb() || Subtarget->hasV8MBaselineOps())) {
1126 // ATOMIC_FENCE needs custom lowering; the others should have been expanded
1127 // to ldrex/strex loops already.
1129 if (!Subtarget->isThumb() || !Subtarget->isMClass())
1131
1132 // On v8, we have particularly efficient implementations of atomic fences
1133 // if they can be combined with nearby atomic loads and stores.
1134 if (!Subtarget->hasAcquireRelease() ||
1135 getTargetMachine().getOptLevel() == 0) {
1136 // Automatically insert fences (dmb ish) around ATOMIC_SWAP etc.
1137 InsertFencesForAtomic = true;
1138 }
1139 } else {
1140 // If there's anything we can use as a barrier, go through custom lowering
1141 // for ATOMIC_FENCE.
1142 // If target has DMB in thumb, Fences can be inserted.
1143 if (Subtarget->hasDataBarrier())
1144 InsertFencesForAtomic = true;
1145
1147 Subtarget->hasAnyDataBarrier() ? Custom : Expand);
1148
1149 // Set them all for expansion, which will force libcalls.
1162 // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the
1163 // Unordered/Monotonic case.
1164 if (!InsertFencesForAtomic) {
1167 }
1168 }
1169
1171
1172 // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.
1173 if (!Subtarget->hasV6Ops()) {
1176 }
1178
1179 if (!Subtarget->useSoftFloat() && Subtarget->hasFPRegs() &&
1180 !Subtarget->isThumb1Only()) {
1181 // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
1182 // iff target supports vfp2.
1185 }
1186
1187 // We want to custom lower some of our intrinsics.
1192 if (Subtarget->useSjLjEH())
1193 setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
1194
1204 if (Subtarget->hasFullFP16()) {
1208 }
1209
1211
1214 if (Subtarget->hasFullFP16())
1219
1220 // We don't support sin/cos/fmod/copysign/pow
1229 if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2Base() &&
1230 !Subtarget->isThumb1Only()) {
1233 }
1236
1237 if (!Subtarget->hasVFP4Base()) {
1240 }
1241
1242 // Various VFP goodness
1243 if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only()) {
1244 // FP-ARMv8 adds f64 <-> f16 conversion. Before that it should be expanded.
1245 if (!Subtarget->hasFPARMv8Base() || !Subtarget->hasFP64()) {
1248 }
1249
1250 // fp16 is a special v7 extension that adds f16 <-> f32 conversions.
1251 if (!Subtarget->hasFP16()) {
1254 }
1255 }
1256
1257 // Use __sincos_stret if available.
1258 if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
1259 getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
1262 }
1263
1264 // FP-ARMv8 implements a lot of rounding-like FP operations.
1265 if (Subtarget->hasFPARMv8Base()) {
1274 if (Subtarget->hasNEON()) {
1279 }
1280
1281 if (Subtarget->hasFP64()) {
1290 }
1291 }
1292
1293 // FP16 often need to be promoted to call lib functions
1294 if (Subtarget->hasFullFP16()) {
1307
1309 }
1310
1311 if (Subtarget->hasNEON()) {
1312 // vmin and vmax aren't available in a scalar form, so we use
1313 // a NEON instruction with an undef lane instead.
1322
1323 if (Subtarget->hasFullFP16()) {
1328
1333 }
1334 }
1335
1336 // We have target-specific dag combine patterns for the following nodes:
1337 // ARMISD::VMOVRRD - No need to call setTargetDAGCombine
1344
1345 if (Subtarget->hasV6Ops())
1347 if (Subtarget->isThumb1Only())
1349
1351
1352 if (Subtarget->useSoftFloat() || Subtarget->isThumb1Only() ||
1353 !Subtarget->hasVFP2Base() || Subtarget->hasMinSize())
1355 else
1357
1358 //// temporary - rewrite interface to use type
1361 MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores
1363 MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores
1365
1366 // On ARM arguments smaller than 4 bytes are extended, so all arguments
1367 // are at least 4 bytes aligned.
1369
1370 // Prefer likely predicted branches to selects on out-of-order cores.
1371 PredictableSelectIsExpensive = Subtarget->getSchedModel().isOutOfOrder();
1372
1374
1375 setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2);
1376
1377 if (Subtarget->isThumb() || Subtarget->isThumb2())
1379}
1380
1382 return Subtarget->useSoftFloat();
1383}
1384
1385// FIXME: It might make sense to define the representative register class as the
1386// nearest super-register that has a non-null superset. For example, DPR_VFP2 is
1387// a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,
1388// SPR's representative would be DPR_VFP2. This should work well if register
1389// pressure tracking were modified such that a register use would increment the
1390// pressure of the register class's representative and all of it's super
1391// classes' representatives transitively. We have not implemented this because
1392// of the difficulty prior to coalescing of modeling operand register classes
1393// due to the common occurrence of cross class copies and subregister insertions
1394// and extractions.
1395std::pair<const TargetRegisterClass *, uint8_t>
1397 MVT VT) const {
1398 const TargetRegisterClass *RRC = nullptr;
1399 uint8_t Cost = 1;
1400 switch (VT.SimpleTy) {
1401 default:
1403 // Use DPR as representative register class for all floating point
1404 // and vector types. Since there are 32 SPR registers and 32 DPR registers so
1405 // the cost is 1 for both f32 and f64.
1406 case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16:
1407 case MVT::v2i32: case MVT::v1i64: case MVT::v2f32:
1408 RRC = &ARM::DPRRegClass;
1409 // When NEON is used for SP, only half of the register file is available
1410 // because operations that define both SP and DP results will be constrained
1411 // to the VFP2 class (D0-D15). We currently model this constraint prior to
1412 // coalescing by double-counting the SP regs. See the FIXME above.
1413 if (Subtarget->useNEONForSinglePrecisionFP())
1414 Cost = 2;
1415 break;
1416 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
1417 case MVT::v4f32: case MVT::v2f64:
1418 RRC = &ARM::DPRRegClass;
1419 Cost = 2;
1420 break;
1421 case MVT::v4i64:
1422 RRC = &ARM::DPRRegClass;
1423 Cost = 4;
1424 break;
1425 case MVT::v8i64:
1426 RRC = &ARM::DPRRegClass;
1427 Cost = 8;
1428 break;
1429 }
1430 return std::make_pair(RRC, Cost);
1431}
1432
1433const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
1434 switch ((ARMISD::NodeType)Opcode) {
1435 case ARMISD::FIRST_NUMBER: break;
1436 case ARMISD::Wrapper: return "ARMISD::Wrapper";
1437 case ARMISD::WrapperPIC: return "ARMISD::WrapperPIC";
1438 case ARMISD::WrapperJT: return "ARMISD::WrapperJT";
1439 case ARMISD::COPY_STRUCT_BYVAL: return "ARMISD::COPY_STRUCT_BYVAL";
1440 case ARMISD::CALL: return "ARMISD::CALL";
1441 case ARMISD::CALL_PRED: return "ARMISD::CALL_PRED";
1442 case ARMISD::CALL_NOLINK: return "ARMISD::CALL_NOLINK";
1443 case ARMISD::BRCOND: return "ARMISD::BRCOND";
1444 case ARMISD::BR_JT: return "ARMISD::BR_JT";
1445 case ARMISD::BR2_JT: return "ARMISD::BR2_JT";
1446 case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG";
1447 case ARMISD::INTRET_FLAG: return "ARMISD::INTRET_FLAG";
1448 case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD";
1449 case ARMISD::CMP: return "ARMISD::CMP";
1450 case ARMISD::CMN: return "ARMISD::CMN";
1451 case ARMISD::CMPZ: return "ARMISD::CMPZ";
1452 case ARMISD::CMPFP: return "ARMISD::CMPFP";
1453 case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0";
1454 case ARMISD::BCC_i64: return "ARMISD::BCC_i64";
1455 case ARMISD::FMSTAT: return "ARMISD::FMSTAT";
1456
1457 case ARMISD::CMOV: return "ARMISD::CMOV";
1458 case ARMISD::SUBS: return "ARMISD::SUBS";
1459
1460 case ARMISD::SSAT: return "ARMISD::SSAT";
1461 case ARMISD::USAT: return "ARMISD::USAT";
1462
1463 case ARMISD::ASRL: return "ARMISD::ASRL";
1464 case ARMISD::LSRL: return "ARMISD::LSRL";
1465 case ARMISD::LSLL: return "ARMISD::LSLL";
1466
1467 case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG";
1468 case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG";
1469 case ARMISD::RRX: return "ARMISD::RRX";
1470
1471 case ARMISD::ADDC: return "ARMISD::ADDC";
1472 case ARMISD::ADDE: return "ARMISD::ADDE";
1473 case ARMISD::SUBC: return "ARMISD::SUBC";
1474 case ARMISD::SUBE: return "ARMISD::SUBE";
1475
1476 case ARMISD::VMOVRRD: return "ARMISD::VMOVRRD";
1477 case ARMISD::VMOVDRR: return "ARMISD::VMOVDRR";
1478 case ARMISD::VMOVhr: return "ARMISD::VMOVhr";
1479 case ARMISD::VMOVrh: return "ARMISD::VMOVrh";
1480 case ARMISD::VMOVSR: return "ARMISD::VMOVSR";
1481
1482 case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP";
1483 case ARMISD::EH_SJLJ_LONGJMP: return "ARMISD::EH_SJLJ_LONGJMP";
1484 case ARMISD::EH_SJLJ_SETUP_DISPATCH: return "ARMISD::EH_SJLJ_SETUP_DISPATCH";
1485
1486 case ARMISD::TC_RETURN: return "ARMISD::TC_RETURN";
1487
1488 case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER";
1489
1490 case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC";
1491
1492 case ARMISD::MEMBARRIER_MCR: return "ARMISD::MEMBARRIER_MCR";
1493
1494 case ARMISD::PRELOAD: return "ARMISD::PRELOAD";
1495
1496 case ARMISD::WIN__CHKSTK: return "ARMISD::WIN__CHKSTK";
1497 case ARMISD::WIN__DBZCHK: return "ARMISD::WIN__DBZCHK";
1498
1499 case ARMISD::VCEQ: return "ARMISD::VCEQ";
1500 case ARMISD::VCEQZ: return "ARMISD::VCEQZ";
1501 case ARMISD::VCGE: return "ARMISD::VCGE";
1502 case ARMISD::VCGEZ: return "ARMISD::VCGEZ";
1503 case ARMISD::VCLEZ: return "ARMISD::VCLEZ";
1504 case ARMISD::VCGEU: return "ARMISD::VCGEU";
1505 case ARMISD::VCGT: return "ARMISD::VCGT";
1506 case ARMISD::VCGTZ: return "ARMISD::VCGTZ";
1507 case ARMISD::VCLTZ: return "ARMISD::VCLTZ";
1508 case ARMISD::VCGTU: return "ARMISD::VCGTU";
1509 case ARMISD::VTST: return "ARMISD::VTST";
1510
1511 case ARMISD::VSHLs: return "ARMISD::VSHLs";
1512 case ARMISD::VSHLu: return "ARMISD::VSHLu";
1513 case ARMISD::VSHLIMM: return "ARMISD::VSHLIMM";
1514 case ARMISD::VSHRsIMM: return "ARMISD::VSHRsIMM";
1515 case ARMISD::VSHRuIMM: return "ARMISD::VSHRuIMM";
1516 case ARMISD::VRSHRsIMM: return "ARMISD::VRSHRsIMM";
1517 case ARMISD::VRSHRuIMM: return "ARMISD::VRSHRuIMM";
1518 case ARMISD::VRSHRNIMM: return "ARMISD::VRSHRNIMM";
1519 case ARMISD::VQSHLsIMM: return "ARMISD::VQSHLsIMM";
1520 case ARMISD::VQSHLuIMM: return "ARMISD::VQSHLuIMM";
1521 case ARMISD::VQSHLsuIMM: return "ARMISD::VQSHLsuIMM";
1522 case ARMISD::VQSHRNsIMM: return "ARMISD::VQSHRNsIMM";
1523 case ARMISD::VQSHRNuIMM: return "ARMISD::VQSHRNuIMM";
1524 case ARMISD::VQSHRNsuIMM: return "ARMISD::VQSHRNsuIMM";
1525 case ARMISD::VQRSHRNsIMM: return "ARMISD::VQRSHRNsIMM";
1526 case ARMISD::VQRSHRNuIMM: return "ARMISD::VQRSHRNuIMM";
1527 case ARMISD::VQRSHRNsuIMM: return "ARMISD::VQRSHRNsuIMM";
1528 case ARMISD::VSLIIMM: return "ARMISD::VSLIIMM";
1529 case ARMISD::VSRIIMM: return "ARMISD::VSRIIMM";
1530 case ARMISD::VGETLANEu: return "ARMISD::VGETLANEu";
1531 case ARMISD::VGETLANEs: return "ARMISD::VGETLANEs";
1532 case ARMISD::VMOVIMM: return "ARMISD::VMOVIMM";
1533 case ARMISD::VMVNIMM: return "ARMISD::VMVNIMM";
1534 case ARMISD::VMOVFPIMM: return "ARMISD::VMOVFPIMM";
1535 case ARMISD::VDUP: return "ARMISD::VDUP";
1536 case ARMISD::VDUPLANE: return "ARMISD::VDUPLANE";
1537 case ARMISD::VEXT: return "ARMISD::VEXT";
1538 case ARMISD::VREV64: return "ARMISD::VREV64";
1539 case ARMISD::VREV32: return "ARMISD::VREV32";
1540 case ARMISD::VREV16: return "ARMISD::VREV16";
1541 case ARMISD::VZIP: return "ARMISD::VZIP";
1542 case ARMISD::VUZP: return "ARMISD::VUZP";
1543 case ARMISD::VTRN: return "ARMISD::VTRN";
1544 case ARMISD::VTBL1: return "ARMISD::VTBL1";
1545 case ARMISD::VTBL2: return "ARMISD::VTBL2";
1546 case ARMISD::VMULLs: return "ARMISD::VMULLs";
1547 case ARMISD::VMULLu: return "ARMISD::VMULLu";
1548 case ARMISD::UMAAL: return "ARMISD::UMAAL";
1549 case ARMISD::UMLAL: return "ARMISD::UMLAL";
1550 case ARMISD::SMLAL: return "ARMISD::SMLAL";
1551 case ARMISD::SMLALBB: return "ARMISD::SMLALBB";
1552 case ARMISD::SMLALBT: return "ARMISD::SMLALBT";
1553 case ARMISD::SMLALTB: return "ARMISD::SMLALTB";
1554 case ARMISD::SMLALTT: return "ARMISD::SMLALTT";
1555 case ARMISD::SMULWB: return "ARMISD::SMULWB";
1556 case ARMISD::SMULWT: return "ARMISD::SMULWT";
1557 case ARMISD::SMLALD: return "ARMISD::SMLALD";
1558 case ARMISD::SMLALDX: return "ARMISD::SMLALDX";
1559 case ARMISD::SMLSLD: return "ARMISD::SMLSLD";
1560 case ARMISD::SMLSLDX: return "ARMISD::SMLSLDX";
1561 case ARMISD::SMMLAR: return "ARMISD::SMMLAR";
1562 case ARMISD::SMMLSR: return "ARMISD::SMMLSR";
1563 case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR";
1564 case ARMISD::BFI: return "ARMISD::BFI";
1565 case ARMISD::VORRIMM: return "ARMISD::VORRIMM";
1566 case ARMISD::VBICIMM: return "ARMISD::VBICIMM";
1567 case ARMISD::VBSL: return "ARMISD::VBSL";
1568 case ARMISD::MEMCPY: return "ARMISD::MEMCPY";
1569 case ARMISD::VLD1DUP: return "ARMISD::VLD1DUP";
1570 case ARMISD::VLD2DUP: return "ARMISD::VLD2DUP";
1571 case ARMISD::VLD3DUP: return "ARMISD::VLD3DUP";
1572 case ARMISD::VLD4DUP: return "ARMISD::VLD4DUP";
1573 case ARMISD::VLD1_UPD: return "ARMISD::VLD1_UPD";
1574 case ARMISD::VLD2_UPD: return "ARMISD::VLD2_UPD";
1575 case ARMISD::VLD3_UPD: return "ARMISD::VLD3_UPD";
1576 case ARMISD::VLD4_UPD: return "ARMISD::VLD4_UPD";
1577 case ARMISD::VLD2LN_UPD: return "ARMISD::VLD2LN_UPD";
1578 case ARMISD::VLD3LN_UPD: return "ARMISD::VLD3LN_UPD";
1579 case ARMISD::VLD4LN_UPD: return "ARMISD::VLD4LN_UPD";
1580 case ARMISD::VLD1DUP_UPD: return "ARMISD::VLD1DUP_UPD";
1581 case ARMISD::VLD2DUP_UPD: return "ARMISD::VLD2DUP_UPD";
1582 case ARMISD::VLD3DUP_UPD: return "ARMISD::VLD3DUP_UPD";
1583 case ARMISD::VLD4DUP_UPD: return "ARMISD::VLD4DUP_UPD";
1584 case ARMISD::VST1_UPD: return "ARMISD::VST1_UPD";
1585 case ARMISD::VST2_UPD: return "ARMISD::VST2_UPD";
1586 case ARMISD::VST3_UPD: return "ARMISD::VST3_UPD";
1587 case ARMISD::VST4_UPD: return "ARMISD::VST4_UPD";
1588 case ARMISD::VST2LN_UPD: return "ARMISD::VST2LN_UPD";
1589 case ARMISD::VST3LN_UPD: return "ARMISD::VST3LN_UPD";
1590 case ARMISD::VST4LN_UPD: return "ARMISD::VST4LN_UPD";
1591 case ARMISD::WLS: return "ARMISD::WLS";
1592 }
1593 return nullptr;
1594}
1595
1597 EVT VT) const {
1598 if (!VT.isVector())
1599 return getPointerTy(DL);
1601}
1602
1603/// getRegClassFor - Return the register class that should be used for the
1604/// specified value type.
1605const TargetRegisterClass *
1606ARMTargetLowering::getRegClassFor(MVT VT, bool isDivergent) const {
1607 (void)isDivergent;
1608 // Map v4i64 to QQ registers but do not make the type legal. Similarly map
1609 // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
1610 // load / store 4 to 8 consecutive NEON D registers, or 2 to 4 consecutive
1611 // MVE Q registers.
1612 if (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) {
1613 if (VT == MVT::v4i64)
1614 return &ARM::QQPRRegClass;
1615 if (VT == MVT::v8i64)
1616 return &ARM::QQQQPRRegClass;
1617 }
1619}
1620
1621// memcpy, and other memory intrinsics, typically tries to use LDM/STM if the
1622// source/dest is aligned and the copy size is large enough. We therefore want
1623// to align such objects passed to memory intrinsics.
1625 unsigned &PrefAlign) const {
1626 if (!isa<MemIntrinsic>(CI))
1627 return false;
1628 MinSize = 8;
1629 // On ARM11 onwards (excluding M class) 8-byte aligned LDM is typically 1
1630 // cycle faster than 4-byte aligned LDM.
1631 PrefAlign = (Subtarget->hasV6Ops() && !Subtarget->isMClass() ? 8 : 4);
1632 return true;
1633}
1634
1635// Create a fast isel object.
1636FastISel *
1638 const TargetLibraryInfo *libInfo) const {
1639 return ARM::createFastISel(funcInfo, libInfo);
1640}
1641
1643 unsigned NumVals = N->getNumValues();
1644 if (!NumVals)
1645 return Sched::RegPressure;
1646
1647 for (unsigned i = 0; i != NumVals; ++i) {
1648 EVT VT = N->getValueType(i);
1649 if (VT == MVT::Glue || VT == MVT::Other)
1650 continue;
1651 if (VT.isFloatingPoint() || VT.isVector())
1652 return Sched::ILP;
1653 }
1654
1655 if (!N->isMachineOpcode())
1656 return Sched::RegPressure;
1657
1658 // Load are scheduled for latency even if there instruction itinerary
1659 // is not available.
1660 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
1661 const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1662
1663 if (MCID.getNumDefs() == 0)
1664 return Sched::RegPressure;
1665 if (!Itins->isEmpty() &&
1666 Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2)
1667 return Sched::ILP;
1668
1669 return Sched::RegPressure;
1670}
1671
1672//===----------------------------------------------------------------------===//
1673// Lowering Code
1674//===----------------------------------------------------------------------===//
1675
1676static bool isSRL16(const SDValue &Op) {
1677 if (Op.getOpcode() != ISD::SRL)
1678 return false;
1679 if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1680 return Const->getZExtValue() == 16;
1681 return false;
1682}
1683
1684static bool isSRA16(const SDValue &Op) {
1685 if (Op.getOpcode() != ISD::SRA)
1686 return false;
1687 if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1688 return Const->getZExtValue() == 16;
1689 return false;
1690}
1691
1692static bool isSHL16(const SDValue &Op) {
1693 if (Op.getOpcode() != ISD::SHL)
1694 return false;
1695 if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1696 return Const->getZExtValue() == 16;
1697 return false;
1698}
1699
1700// Check for a signed 16-bit value. We special case SRA because it makes it
1701// more simple when also looking for SRAs that aren't sign extending a
1702// smaller value. Without the check, we'd need to take extra care with
1703// checking order for some operations.
1704static bool isS16(const SDValue &Op, SelectionDAG &DAG) {
1705 if (isSRA16(Op))
1706 return isSHL16(Op.getOperand(0));
1707 return DAG.ComputeNumSignBits(Op) == 17;
1708}
1709
1710/// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
1712 switch (CC) {
1713 default: llvm_unreachable("Unknown condition code!");
1714 case ISD::SETNE: return ARMCC::NE;
1715 case ISD::SETEQ: return ARMCC::EQ;
1716 case ISD::SETGT: return ARMCC::GT;
1717 case ISD::SETGE: return ARMCC::GE;
1718 case ISD::SETLT: return ARMCC::LT;
1719 case ISD::SETLE: return ARMCC::LE;
1720 case ISD::SETUGT: return ARMCC::HI;
1721 case ISD::SETUGE: return ARMCC::HS;
1722 case ISD::SETULT: return ARMCC::LO;
1723 case ISD::SETULE: return ARMCC::LS;
1724 }
1725}
1726
1727/// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
1729 ARMCC::CondCodes &CondCode2, bool &InvalidOnQNaN) {
1730 CondCode2 = ARMCC::AL;
1731 InvalidOnQNaN = true;
1732 switch (CC) {
1733 default: llvm_unreachable("Unknown FP condition!");
1734 case ISD::SETEQ:
1735 case ISD::SETOEQ:
1736 CondCode = ARMCC::EQ;
1737 InvalidOnQNaN = false;
1738 break;
1739 case ISD::SETGT:
1740 case ISD::SETOGT: CondCode = ARMCC::GT; break;
1741 case ISD::SETGE:
1742 case ISD::SETOGE: CondCode = ARMCC::GE; break;
1743 case ISD::SETOLT: CondCode = ARMCC::MI; break;
1744 case ISD::SETOLE: CondCode = ARMCC::LS; break;
1745 case ISD::SETONE:
1746 CondCode = ARMCC::MI;
1747 CondCode2 = ARMCC::GT;
1748 InvalidOnQNaN = false;
1749 break;
1750 case ISD::SETO: CondCode = ARMCC::VC; break;
1751 case ISD::SETUO: CondCode = ARMCC::VS; break;
1752 case ISD::SETUEQ:
1753 CondCode = ARMCC::EQ;
1754 CondCode2 = ARMCC::VS;
1755 InvalidOnQNaN = false;
1756 break;
1757 case ISD::SETUGT: CondCode = ARMCC::HI; break;
1758 case ISD::SETUGE: CondCode = ARMCC::PL; break;
1759 case ISD::SETLT:
1760 case ISD::SETULT: CondCode = ARMCC::LT; break;
1761 case ISD::SETLE:
1762 case ISD::SETULE: CondCode = ARMCC::LE; break;
1763 case ISD::SETNE:
1764 case ISD::SETUNE:
1765 CondCode = ARMCC::NE;
1766 InvalidOnQNaN = false;
1767 break;
1768 }
1769}
1770
1771//===----------------------------------------------------------------------===//
1772// Calling Convention Implementation
1773//===----------------------------------------------------------------------===//
1774
1775/// getEffectiveCallingConv - Get the effective calling convention, taking into
1776/// account presence of floating point hardware and calling convention
1777/// limitations, such as support for variadic functions.
1779ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,
1780 bool isVarArg) const {
1781 switch (CC) {
1782 default:
1783 report_fatal_error("Unsupported calling convention");
1786 case CallingConv::GHC:
1787 return CC;
1791 case CallingConv::Swift:
1793 case CallingConv::C:
1794 if (!Subtarget->isAAPCS_ABI())
1795 return CallingConv::ARM_APCS;
1796 else if (Subtarget->hasVFP2Base() && !Subtarget->isThumb1Only() &&
1797 getTargetMachine().Options.FloatABIType == FloatABI::Hard &&
1798 !isVarArg)
1800 else
1802 case CallingConv::Fast:
1804 if (!Subtarget->isAAPCS_ABI()) {
1805 if (Subtarget->hasVFP2Base() && !Subtarget->isThumb1Only() && !isVarArg)
1806 return CallingConv::Fast;
1807 return CallingConv::ARM_APCS;
1808 } else if (Subtarget->hasVFP2Base() &&
1809 !Subtarget->isThumb1Only() && !isVarArg)
1811 else
1813 }
1814}
1815
1817 bool isVarArg) const {
1818 return CCAssignFnForNode(CC, false, isVarArg);
1819}
1820
1822 bool isVarArg) const {
1823 return CCAssignFnForNode(CC, true, isVarArg);
1824}
1825
1826/// CCAssignFnForNode - Selects the correct CCAssignFn for the given
1827/// CallingConvention.
1828CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
1829 bool Return,
1830 bool isVarArg) const {
1831 switch (getEffectiveCallingConv(CC, isVarArg)) {
1832 default:
1833 report_fatal_error("Unsupported calling convention");
1835 return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
1837 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
1839 return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
1840 case CallingConv::Fast:
1841 return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
1842 case CallingConv::GHC:
1843 return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC);
1845 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
1846 }
1847}
1848
1849/// LowerCallResult - Lower the result values of a call into the
1850/// appropriate copies out of appropriate physical registers.
1851SDValue ARMTargetLowering::LowerCallResult(
1852 SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
1853 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
1854 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
1855 SDValue ThisVal) const {
1856 // Assign locations to each value returned by this call.
1858 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
1859 *DAG.getContext());
1860 CCInfo.AnalyzeCallResult(Ins, CCAssignFnForReturn(CallConv, isVarArg));
1861
1862 // Copy all of the result registers out of their specified physreg.
1863 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1864 CCValAssign VA = RVLocs[i];
1865
1866 // Pass 'this' value directly from the argument to return value, to avoid
1867 // reg unit interference
1868 if (i == 0 && isThisReturn) {
1869 assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 &&
1870 "unexpected return calling convention register assignment");
1871 InVals.push_back(ThisVal);
1872 continue;
1873 }
1874
1875 SDValue Val;
1876 if (VA.needsCustom()) {
1877 // Handle f64 or half of a v2f64.
1878 SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
1879 InFlag);
1880 Chain = Lo.getValue(1);
1881 InFlag = Lo.getValue(2);
1882 VA = RVLocs[++i]; // skip ahead to next loc
1883 SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
1884 InFlag);
1885 Chain = Hi.getValue(1);
1886 InFlag = Hi.getValue(2);
1887 if (!Subtarget->isLittle())
1888 std::swap (Lo, Hi);
1889 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
1890
1891 if (VA.getLocVT() == MVT::v2f64) {
1892 SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
1893 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
1894 DAG.getConstant(0, dl, MVT::i32));
1895
1896 VA = RVLocs[++i]; // skip ahead to next loc
1897 Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
1898 Chain = Lo.getValue(1);
1899 InFlag = Lo.getValue(2);
1900 VA = RVLocs[++i]; // skip ahead to next loc
1901 Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
1902 Chain = Hi.getValue(1);
1903 InFlag = Hi.getValue(2);
1904 if (!Subtarget->isLittle())
1905 std::swap (Lo, Hi);
1906 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
1907 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
1908 DAG.getConstant(1, dl, MVT::i32));
1909 }
1910 } else {
1911 Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
1912 InFlag);
1913 Chain = Val.getValue(1);
1914 InFlag = Val.getValue(2);
1915 }
1916
1917 switch (VA.getLocInfo()) {
1918 default: llvm_unreachable("Unknown loc info!");
1919 case CCValAssign::Full: break;
1920 case CCValAssign::BCvt:
1921 Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val);
1922 break;
1923 }
1924
1925 InVals.push_back(Val);
1926 }
1927
1928 return Chain;
1929}
1930
1931/// LowerMemOpCallTo - Store the argument to the stack.
1932SDValue ARMTargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
1933 SDValue Arg, const SDLoc &dl,
1934 SelectionDAG &DAG,
1935 const CCValAssign &VA,
1936 ISD::ArgFlagsTy Flags) const {
1937 unsigned LocMemOffset = VA.getLocMemOffset();
1938 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
1939 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
1940 StackPtr, PtrOff);
1941 return DAG.getStore(
1942 Chain, dl, Arg, PtrOff,
1944}
1945
1946void ARMTargetLowering::PassF64ArgInRegs(const SDLoc &dl, SelectionDAG &DAG,
1947 SDValue Chain, SDValue &Arg,
1948 RegsToPassVector &RegsToPass,
1949 CCValAssign &VA, CCValAssign &NextVA,
1950 SDValue &StackPtr,
1951 SmallVectorImpl<SDValue> &MemOpChains,
1952 ISD::ArgFlagsTy Flags) const {
1953 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
1955 unsigned id = Subtarget->isLittle() ? 0 : 1;
1956 RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd.getValue(id)));
1957
1958 if (NextVA.isRegLoc())
1959 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1-id)));
1960 else {
1961 assert(NextVA.isMemLoc());
1962 if (!StackPtr.getNode())
1963 StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP,
1965
1966 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1-id),
1967 dl, DAG, NextVA,
1968 Flags));
1969 }
1970}
1971
1972/// LowerCall - Lowering a call into a callseq_start <-
1973/// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
1974/// nodes.
1975SDValue
1976ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
1977 SmallVectorImpl<SDValue> &InVals) const {
1978 SelectionDAG &DAG = CLI.DAG;
1979 SDLoc &dl = CLI.DL;
1981 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1983 SDValue Chain = CLI.Chain;
1984 SDValue Callee = CLI.Callee;
1985 bool &isTailCall = CLI.IsTailCall;
1986 CallingConv::ID CallConv = CLI.CallConv;
1987 bool doesNotRet = CLI.DoesNotReturn;
1988 bool isVarArg = CLI.IsVarArg;
1989
1991 bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
1992 bool isThisReturn = false;
1993 auto Attr = MF.getFunction().getFnAttribute("disable-tail-calls");
1994 bool PreferIndirect = false;
1995
1996 // Disable tail calls if they're not supported.
1997 if (!Subtarget->supportsTailCall() || Attr.getValueAsString() == "true")
1998 isTailCall = false;
1999
2000 if (isa<GlobalAddressSDNode>(Callee)) {
2001 // If we're optimizing for minimum size and the function is called three or
2002 // more times in this block, we can improve codesize by calling indirectly
2003 // as BLXr has a 16-bit encoding.
2004 auto *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
2005 if (CLI.CS) {
2006 auto *BB = CLI.CS.getParent();
2007 PreferIndirect = Subtarget->isThumb() && Subtarget->hasMinSize() &&
2008 count_if(GV->users(), [&BB](const User *U) {
2009 return isa<Instruction>(U) &&
2010 cast<Instruction>(U)->getParent() == BB;
2011 }) > 2;
2012 }
2013 }
2014 if (isTailCall) {
2015 // Check if it's really possible to do a tail call.
2016 isTailCall = IsEligibleForTailCallOptimization(
2017 Callee, CallConv, isVarArg, isStructRet,
2018 MF.getFunction().hasStructRetAttr(), Outs, OutVals, Ins, DAG,
2019 PreferIndirect);
2020 if (!isTailCall && CLI.CS && CLI.CS.isMustTailCall())
2021 report_fatal_error("failed to perform tail call elimination on a call "
2022 "site marked musttail");
2023 // We don't support GuaranteedTailCallOpt for ARM, only automatically
2024 // detected sibcalls.
2025 if (isTailCall)
2026 ++NumTailCalls;
2027 }
2028
2029 // Analyze operands of the call, assigning locations to each operand.
2031 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
2032 *DAG.getContext());
2033 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CallConv, isVarArg));
2034
2035 // Get a count of how many bytes are to be pushed on the stack.
2036 unsigned NumBytes = CCInfo.getNextStackOffset();
2037
2038 if (isTailCall) {
2039 // For tail calls, memory operands are available in our caller's stack.
2040 NumBytes = 0;
2041 } else {
2042 // Adjust the stack pointer for the new arguments...
2043 // These operations are automatically eliminated by the prolog/epilog pass
2044 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
2045 }
2046
2048 DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy(DAG.getDataLayout()));
2049
2050 RegsToPassVector RegsToPass;
2051 SmallVector<SDValue, 8> MemOpChains;
2052
2053 // Walk the register/memloc assignments, inserting copies/loads. In the case
2054 // of tail call optimization, arguments are handled later.
2055 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
2056 i != e;
2057 ++i, ++realArgIdx) {
2058 CCValAssign &VA = ArgLocs[i];
2059 SDValue Arg = OutVals[realArgIdx];
2060 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
2061 bool isByVal = Flags.isByVal();
2062
2063 // Promote the value if needed.
2064 switch (VA.getLocInfo()) {
2065 default: llvm_unreachable("Unknown loc info!");
2066 case CCValAssign::Full: break;
2067 case CCValAssign::SExt:
2068 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
2069 break;
2070 case CCValAssign::ZExt:
2071 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
2072 break;
2073 case CCValAssign::AExt:
2074 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
2075 break;
2076 case CCValAssign::BCvt:
2077 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
2078 break;
2079 }
2080
2081 // f64 and v2f64 might be passed in i32 pairs and must be split into pieces
2082 if (VA.needsCustom()) {
2083 if (VA.getLocVT() == MVT::v2f64) {
2085 DAG.getConstant(0, dl, MVT::i32));
2087 DAG.getConstant(1, dl, MVT::i32));
2088
2089 PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass,
2090 VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
2091
2092 VA = ArgLocs[++i]; // skip ahead to next loc
2093 if (VA.isRegLoc()) {
2094 PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass,
2095 VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
2096 } else {
2097 assert(VA.isMemLoc());
2098
2099 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1,
2100 dl, DAG, VA, Flags));
2101 }
2102 } else {
2103 PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],
2104 StackPtr, MemOpChains, Flags);
2105 }
2106 } else if (VA.isRegLoc()) {
2107 if (realArgIdx == 0 && Flags.isReturned() && !Flags.isSwiftSelf() &&
2108 Outs[0].VT == MVT::i32) {
2109 assert(VA.getLocVT() == MVT::i32 &&
2110 "unexpected calling convention register assignment");
2111 assert(!Ins.empty() && Ins[0].VT == MVT::i32 &&
2112 "unexpected use of 'returned'");
2113 isThisReturn = true;
2114 }
2115 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
2116 } else if (isByVal) {
2117 assert(VA.isMemLoc());
2118 unsigned offset = 0;
2119
2120 // True if this byval aggregate will be split between registers
2121 // and memory.
2122 unsigned ByValArgsCount = CCInfo.getInRegsParamsCount();
2123 unsigned CurByValIdx = CCInfo.getInRegsParamsProcessed();
2124
2125 if (CurByValIdx < ByValArgsCount) {
2126
2127 unsigned RegBegin, RegEnd;
2128 CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd);
2129
2130 EVT PtrVT =
2132 unsigned int i, j;
2133 for (i = 0, j = RegBegin; j < RegEnd; i++, j++) {
2134 SDValue Const = DAG.getConstant(4*i, dl, MVT::i32);
2135 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
2136 SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
2138 DAG.InferPtrAlignment(AddArg));
2139 MemOpChains.push_back(Load.getValue(1));
2140 RegsToPass.push_back(std::make_pair(j, Load));
2141 }
2142
2143 // If parameter size outsides register area, "offset" value
2144 // helps us to calculate stack slot for remained part properly.
2145 offset = RegEnd - RegBegin;
2146
2147 CCInfo.nextInRegsParam();
2148 }
2149
2150 if (Flags.getByValSize() > 4*offset) {
2151 auto PtrVT = getPointerTy(DAG.getDataLayout());
2152 unsigned LocMemOffset = VA.getLocMemOffset();
2153 SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
2154 SDValue Dst = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, StkPtrOff);
2155 SDValue SrcOffset = DAG.getIntPtrConstant(4*offset, dl);
2156 SDValue Src = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, SrcOffset);
2157 SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, dl,
2158 MVT::i32);
2159 SDValue AlignNode = DAG.getConstant(Flags.getByValAlign(), dl,
2160 MVT::i32);
2161
2163 SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode};
2164 MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs,
2165 Ops));
2166 }
2167 } else if (!isTailCall) {
2168 assert(VA.isMemLoc());
2169
2170 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
2171 dl, DAG, VA, Flags));
2172 }
2173 }
2174
2175 if (!MemOpChains.empty())
2176 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
2177
2178 // Build a sequence of copy-to-reg nodes chained together with token chain
2179 // and flag operands which copy the outgoing args into the appropriate regs.
2180 SDValue InFlag;
2181 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
2182 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
2183 RegsToPass[i].second, InFlag);
2184 InFlag = Chain.getValue(1);
2185 }
2186
2187 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
2188 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
2189 // node so that legalize doesn't hack it.
2190 bool isDirect = false;
2191
2193 const Module *Mod = MF.getFunction().getParent();
2194 const GlobalValue *GV = nullptr;
2195 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
2196 GV = G->getGlobal();
2197 bool isStub =
2198 !TM.shouldAssumeDSOLocal(*Mod, GV) && Subtarget->isTargetMachO();
2199
2200 bool isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass());
2201 bool isLocalARMFunc = false;
2203 auto PtrVt = getPointerTy(DAG.getDataLayout());
2204
2205 if (Subtarget->genLongCalls()) {
2206 assert((!isPositionIndependent() || Subtarget->isTargetWindows()) &&
2207 "long-calls codegen is not position independent!");
2208 // Handle a global address or an external symbol. If it's not one of
2209 // those, the target's already in a register, so we don't need to do
2210 // anything extra.
2211 if (isa<GlobalAddressSDNode>(Callee)) {
2212 // Create a constant pool entry for the callee address
2213 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2215 ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 0);
2216
2217 // Get the address of the callee into a register
2218 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
2219 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2220 Callee = DAG.getLoad(
2221 PtrVt, dl, DAG.getEntryNode(), CPAddr,
2223 } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
2224 const char *Sym = S->getSymbol();
2225
2226 // Create a constant pool entry for the callee address
2227 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2230 ARMPCLabelIndex, 0);
2231 // Get the address of the callee into a register
2232 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
2233 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2234 Callee = DAG.getLoad(
2235 PtrVt, dl, DAG.getEntryNode(), CPAddr,
2237 }
2238 } else if (isa<GlobalAddressSDNode>(Callee)) {
2239 if (!PreferIndirect) {
2240 isDirect = true;
2241 bool isDef = GV->isStrongDefinitionForLinker();
2242
2243 // ARM call to a local ARM function is predicable.
2244 isLocalARMFunc = !Subtarget->isThumb() && (isDef || !ARMInterworking);
2245 // tBX takes a register source operand.
2246 if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2247 assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?");
2248 Callee = DAG.getNode(
2249 ARMISD::WrapperPIC, dl, PtrVt,
2250 DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, ARMII::MO_NONLAZY));
2251 Callee = DAG.getLoad(
2252 PtrVt, dl, DAG.getEntryNode(), Callee,
2254 /* Alignment = */ 0, MachineMemOperand::MODereferenceable |
2256 } else if (Subtarget->isTargetCOFF()) {
2257 assert(Subtarget->isTargetWindows() &&
2258 "Windows is the only supported COFF target");
2259 unsigned TargetFlags = GV->hasDLLImportStorageClass()
2262 Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, /*offset=*/0,
2263 TargetFlags);
2264 if (GV->hasDLLImportStorageClass())
2265 Callee =
2266 DAG.getLoad(PtrVt, dl, DAG.getEntryNode(),
2267 DAG.getNode(ARMISD::Wrapper, dl, PtrVt, Callee),
2269 } else {
2270 Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, 0);
2271 }
2272 }
2273 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2274 isDirect = true;
2275 // tBX takes a register source operand.
2276 const char *Sym = S->getSymbol();
2277 if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2278 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2281 ARMPCLabelIndex, 4);
2282 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
2283 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2284 Callee = DAG.getLoad(
2285 PtrVt, dl, DAG.getEntryNode(), CPAddr,
2287 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
2288 Callee = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVt, Callee, PICLabel);
2289 } else {
2290 Callee = DAG.getTargetExternalSymbol(Sym, PtrVt, 0);
2291 }
2292 }
2293
2294 // FIXME: handle tail calls differently.
2295 unsigned CallOpc;
2296 if (Subtarget->isThumb()) {
2297 if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
2298 CallOpc = ARMISD::CALL_NOLINK;
2299 else
2300 CallOpc = ARMISD::CALL;
2301 } else {
2302 if (!isDirect && !Subtarget->hasV5TOps())
2303 CallOpc = ARMISD::CALL_NOLINK;
2304 else if (doesNotRet && isDirect && Subtarget->hasRetAddrStack() &&
2305 // Emit regular call when code size is the priority
2306 !Subtarget->hasMinSize())
2307 // "mov lr, pc; b _foo" to avoid confusing the RSP
2308 CallOpc = ARMISD::CALL_NOLINK;
2309 else
2310 CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL;
2311 }
2312
2313 std::vector<SDValue> Ops;
2314 Ops.push_back(Chain);
2315 Ops.push_back(Callee);
2316
2317 // Add argument registers to the end of the list so that they are known live
2318 // into the call.
2319 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
2320 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
2321 RegsToPass[i].second.getValueType()));
2322
2323 // Add a register mask operand representing the call-preserved registers.
2324 if (!isTailCall) {
2325 const uint32_t *Mask;
2326 const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
2327 if (isThisReturn) {
2328 // For 'this' returns, use the R0-preserving mask if applicable
2329 Mask = ARI->getThisReturnPreservedMask(MF, CallConv);
2330 if (!Mask) {
2331 // Set isThisReturn to false if the calling convention is not one that
2332 // allows 'returned' to be modeled in this way, so LowerCallResult does
2333 // not try to pass 'this' straight through
2334 isThisReturn = false;
2335 Mask = ARI->getCallPreservedMask(MF, CallConv);
2336 }
2337 } else
2338 Mask = ARI->getCallPreservedMask(MF, CallConv);
2339
2340 assert(Mask && "Missing call preserved mask for calling convention");
2341 Ops.push_back(DAG.getRegisterMask(Mask));
2342 }
2343
2344 if (InFlag.getNode())
2345 Ops.push_back(InFlag);
2346
2347 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2348 if (isTailCall) {
2350 return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, Ops);
2351 }
2352
2353 // Returns a chain and a flag for retval copy to use.
2354 Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
2355 InFlag = Chain.getValue(1);
2356
2357 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
2358 DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
2359 if (!Ins.empty())
2360 InFlag = Chain.getValue(1);
2361
2362 // Handle result values, copying them out of physregs into vregs that we
2363 // return.
2364 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
2365 InVals, isThisReturn,
2366 isThisReturn ? OutVals[0] : SDValue());
2367}
2368
2369/// HandleByVal - Every parameter *after* a byval parameter is passed
2370/// on the stack. Remember the next parameter register to allocate,
2371/// and then confiscate the rest of the parameter registers to insure
2372/// this.
2373void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size,
2374 unsigned Align) const {
2375 // Byval (as with any stack) slots are always at least 4 byte aligned.
2376 Align = std::max(Align, 4U);
2377
2378 unsigned Reg = State->AllocateReg(GPRArgRegs);
2379 if (!Reg)
2380 return;
2381
2382 unsigned AlignInRegs = Align / 4;
2383 unsigned Waste = (ARM::R4 - Reg) % AlignInRegs;
2384 for (unsigned i = 0; i < Waste; ++i)
2385 Reg = State->AllocateReg(GPRArgRegs);
2386
2387 if (!Reg)
2388 return;
2389
2390 unsigned Excess = 4 * (ARM::R4 - Reg);
2391
2392 // Special case when NSAA != SP and parameter size greater than size of
2393 // all remained GPR regs. In that case we can't split parameter, we must
2394 // send it to stack. We also must set NCRN to R4, so waste all
2395 // remained registers.
2396 const unsigned NSAAOffset = State->getNextStackOffset();
2397 if (NSAAOffset != 0 && Size > Excess) {
2398 while (State->AllocateReg(GPRArgRegs))
2399 ;
2400 return;
2401 }
2402
2403 // First register for byval parameter is the first register that wasn't
2404 // allocated before this method call, so it would be "reg".
2405 // If parameter is small enough to be saved in range [reg, r4), then
2406 // the end (first after last) register would be reg + param-size-in-regs,
2407 // else parameter would be splitted between registers and stack,
2408 // end register would be r4 in this case.
2409 unsigned ByValRegBegin = Reg;
2410 unsigned ByValRegEnd = std::min<unsigned>(Reg + Size / 4, ARM::R4);
2411 State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd);
2412 // Note, first register is allocated in the beginning of function already,
2413 // allocate remained amount of registers we need.
2414 for (unsigned i = Reg + 1; i != ByValRegEnd; ++i)
2415 State->AllocateReg(GPRArgRegs);
2416 // A byval parameter that is split between registers and memory needs its
2417 // size truncated here.
2418 // In the case where the entire structure fits in registers, we set the
2419 // size in memory to zero.
2420 Size = std::max<int>(Size - Excess, 0);
2421}
2422
2423/// MatchingStackOffset - Return true if the given stack call argument is
2424/// already available in the same position (relatively) of the caller's
2425/// incoming argument stack.
2426static
2427bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
2429 const TargetInstrInfo *TII) {
2430 unsigned Bytes = Arg.getValueSizeInBits() / 8;
2431 int FI = std::numeric_limits<int>::max();
2432 if (Arg.getOpcode() == ISD::CopyFromReg) {
2433 unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
2435 return false;
2436 MachineInstr *Def = MRI->getVRegDef(VR);
2437 if (!Def)
2438 return false;
2439 if (!Flags.isByVal()) {
2440 if (!TII->isLoadFromStackSlot(*Def, FI))
2441 return false;
2442 } else {
2443 return false;
2444 }
2445 } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
2446 if (Flags.isByVal())
2447 // ByVal argument is passed in as a pointer but it's now being
2448 // dereferenced. e.g.
2449 // define @foo(%struct.X* %A) {
2450 // tail call @bar(%struct.X* byval %A)
2451 // }
2452 return false;
2453 SDValue Ptr = Ld->getBasePtr();
2454 FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
2455 if (!FINode)
2456 return false;
2457 FI = FINode->getIndex();
2458 } else
2459 return false;
2460
2461 assert(FI != std::numeric_limits<int>::max());
2462 if (!MFI.isFixedObjectIndex(FI))
2463 return false;
2464 return Offset == MFI.getObjectOffset(FI) && Bytes == MFI.getObjectSize(FI);
2465}
2466
2467/// IsEligibleForTailCallOptimization - Check whether the call is eligible
2468/// for tail call optimization. Targets which want to do tail call
2469/// optimization should implement this function.
2470bool ARMTargetLowering::IsEligibleForTailCallOptimization(
2471 SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
2472 bool isCalleeStructRet, bool isCallerStructRet,
2474 const SmallVectorImpl<SDValue> &OutVals,
2476 const bool isIndirect) const {
2478 const Function &CallerF = MF.getFunction();
2479 CallingConv::ID CallerCC = CallerF.getCallingConv();
2480
2481 assert(Subtarget->supportsTailCall());
2482
2483 // Indirect tail calls cannot be optimized for Thumb1 if the args
2484 // to the call take up r0-r3. The reason is that there are no legal registers
2485 // left to hold the pointer to the function to be called.
2486 if (Subtarget->isThumb1Only() && Outs.size() >= 4 &&
2487 (!isa<GlobalAddressSDNode>(Callee.getNode()) || isIndirect))
2488 return false;
2489
2490 // Look for obvious safe cases to perform tail call optimization that do not
2491 // require ABI changes. This is what gcc calls sibcall.
2492
2493 // Exception-handling functions need a special set of instructions to indicate
2494 // a return to the hardware. Tail-calling another function would probably
2495 // break this.
2496 if (CallerF.hasFnAttribute("interrupt"))
2497 return false;
2498
2499 // Also avoid sibcall optimization if either caller or callee uses struct
2500 // return semantics.
2501 if (isCalleeStructRet || isCallerStructRet)
2502 return false;
2503
2504 // Externally-defined functions with weak linkage should not be
2505 // tail-called on ARM when the OS does not support dynamic
2506 // pre-emption of symbols, as the AAELF spec requires normal calls
2507 // to undefined weak functions to be replaced with a NOP or jump to the
2508 // next instruction. The behaviour of branch instructions in this
2509 // situation (as used for tail calls) is implementation-defined, so we
2510 // cannot rely on the linker replacing the tail call with a return.
2511 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
2512 const GlobalValue *GV = G->getGlobal();
2514 if (GV->hasExternalWeakLinkage() &&
2515 (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
2516 return false;
2517 }
2518
2519 // Check that the call results are passed in the same way.
2520 LLVMContext &C = *DAG.getContext();
2521 if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
2522 CCAssignFnForReturn(CalleeCC, isVarArg),
2523 CCAssignFnForReturn(CallerCC, isVarArg)))
2524 return false;
2525 // The callee has to preserve all registers the caller needs to preserve.
2526 const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
2527 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
2528 if (CalleeCC != CallerCC) {
2529 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
2530 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
2531 return false;
2532 }
2533
2534 // If Caller's vararg or byval argument has been split between registers and
2535 // stack, do not perform tail call, since part of the argument is in caller's
2536 // local frame.
2537 const ARMFunctionInfo *AFI_Caller = MF.getInfo<ARMFunctionInfo>();
2538 if (AFI_Caller->getArgRegsSaveSize())
2539 return false;
2540
2541 // If the callee takes no arguments then go on to check the results of the
2542 // call.
2543 if (!Outs.empty()) {
2544 // Check if stack adjustment is needed. For now, do not do this if any
2545 // argument is passed on the stack.
2547 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
2548 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg));
2549 if (CCInfo.getNextStackOffset()) {
2550 // Check if the arguments are already laid out in the right way as
2551 // the caller's fixed stack objects.
2552 MachineFrameInfo &MFI = MF.getFrameInfo();
2553 const MachineRegisterInfo *MRI = &MF.getRegInfo();
2554 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2555 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
2556 i != e;
2557 ++i, ++realArgIdx) {
2558 CCValAssign &VA = ArgLocs[i];
2559 EVT RegVT = VA.getLocVT();
2560 SDValue Arg = OutVals[realArgIdx];
2561 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
2563 return false;
2564 if (VA.needsCustom()) {
2565 // f64 and vector types are split into multiple registers or
2566 // register/stack-slot combinations. The types will not match
2567 // the registers; give up on memory f64 refs until we figure
2568 // out what to do about this.
2569 if (!VA.isRegLoc())
2570 return false;
2571 if (!ArgLocs[++i].isRegLoc())
2572 return false;
2573 if (RegVT == MVT::v2f64) {
2574 if (!ArgLocs[++i].isRegLoc())
2575 return false;
2576 if (!ArgLocs[++i].isRegLoc())
2577 return false;
2578 }
2579 } else if (!VA.isRegLoc()) {
2580 if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
2581 MFI, MRI, TII))
2582 return false;
2583 }
2584 }
2585 }
2586
2587 const MachineRegisterInfo &MRI = MF.getRegInfo();
2588 if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
2589 return false;
2590 }
2591
2592 return true;
2593}
2594
2595bool
2597 MachineFunction &MF, bool isVarArg,
2599 LLVMContext &Context) const {
2601 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
2602 return CCInfo.CheckReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
2603}
2604
2606 const SDLoc &DL, SelectionDAG &DAG) {
2607 const MachineFunction &MF = DAG.getMachineFunction();
2608 const Function &F = MF.getFunction();
2609
2610 StringRef IntKind = F.getFnAttribute("interrupt").getValueAsString();
2611
2612 // See ARM ARM v7 B1.8.3. On exception entry LR is set to a possibly offset
2613 // version of the "preferred return address". These offsets affect the return
2614 // instruction if this is a return from PL1 without hypervisor extensions.
2615 // IRQ/FIQ: +4 "subs pc, lr, #4"
2616 // SWI: 0 "subs pc, lr, #0"
2617 // ABORT: +4 "subs pc, lr, #4"
2618 // UNDEF: +4/+2 "subs pc, lr, #0"
2619 // UNDEF varies depending on where the exception came from ARM or Thumb
2620 // mode. Alongside GCC, we throw our hands up in disgust and pretend it's 0.
2621
2622 int64_t LROffset;
2623 if (IntKind == "" || IntKind == "IRQ" || IntKind == "FIQ" ||
2624 IntKind == "ABORT")
2625 LROffset = 4;
2626 else if (IntKind == "SWI" || IntKind == "UNDEF")
2627 LROffset = 0;
2628 else
2629 report_fatal_error("Unsupported interrupt attribute. If present, value "
2630 "must be one of: IRQ, FIQ, SWI, ABORT or UNDEF");
2631
2632 RetOps.insert(RetOps.begin() + 1,
2633 DAG.getConstant(LROffset, DL, MVT::i32, false));
2634
2635 return DAG.getNode(ARMISD::INTRET_FLAG, DL, MVT::Other, RetOps);
2636}
2637
2638SDValue
2639ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
2640 bool isVarArg,
2642 const SmallVectorImpl<SDValue> &OutVals,
2643 const SDLoc &dl, SelectionDAG &DAG) const {
2644 // CCValAssign - represent the assignment of the return value to a location.
2646
2647 // CCState - Info about the registers and stack slots.
2648 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
2649 *DAG.getContext());
2650
2651 // Analyze outgoing return values.
2652 CCInfo.AnalyzeReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
2653
2654 SDValue Flag;
2656 RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
2657 bool isLittleEndian = Subtarget->isLittle();
2658
2661 AFI->setReturnRegsCount(RVLocs.size());
2662
2663 // Copy the result values into the output registers.
2664 for (unsigned i = 0, realRVLocIdx = 0;
2665 i != RVLocs.size();
2666 ++i, ++realRVLocIdx) {
2667 CCValAssign &VA = RVLocs[i];
2668 assert(VA.isRegLoc() && "Can only return in registers!");
2669
2670 SDValue Arg = OutVals[realRVLocIdx];
2671 bool ReturnF16 = false;
2672
2673 if (Subtarget->hasFullFP16() && Subtarget->isTargetHardFloat()) {
2674 // Half-precision return values can be returned like this:
2675 //
2676 // t11 f16 = fadd ...
2677 // t12: i16 = bitcast t11
2678 // t13: i32 = zero_extend t12
2679 // t14: f32 = bitcast t13 <~~~~~~~ Arg
2680 //
2681 // to avoid code generation for bitcasts, we simply set Arg to the node
2682 // that produces the f16 value, t11 in this case.
2683 //
2684 if (Arg.getValueType() == MVT::f32 && Arg.getOpcode() == ISD::BITCAST) {
2685 SDValue ZE = Arg.getOperand(0);
2686 if (ZE.getOpcode() == ISD::ZERO_EXTEND && ZE.getValueType() == MVT::i32) {
2687 SDValue BC = ZE.getOperand(0);
2688 if (BC.getOpcode() == ISD::BITCAST && BC.getValueType() == MVT::i16) {
2689 Arg = BC.getOperand(0);
2690 ReturnF16 = true;
2691 }
2692 }
2693 }
2694 }
2695
2696 switch (VA.getLocInfo()) {
2697 default: llvm_unreachable("Unknown loc info!");
2698 case CCValAssign::Full: break;
2699 case CCValAssign::BCvt:
2700 if (!ReturnF16)
2701 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
2702 break;
2703 }
2704
2705 if (VA.needsCustom()) {
2706 if (VA.getLocVT() == MVT::v2f64) {
2707 // Extract the first half and return it in two registers.
2709 DAG.getConstant(0, dl, MVT::i32));
2710 SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl,
2711 DAG.getVTList(MVT::i32, MVT::i32), Half);
2712
2713 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2714 HalfGPRs.getValue(isLittleEndian ? 0 : 1),
2715 Flag);
2716 Flag = Chain.getValue(1);
2717 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2718 VA = RVLocs[++i]; // skip ahead to next loc
2719 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2720 HalfGPRs.getValue(isLittleEndian ? 1 : 0),
2721 Flag);
2722 Flag = Chain.getValue(1);
2723 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2724 VA = RVLocs[++i]; // skip ahead to next loc
2725
2726 // Extract the 2nd half and fall through to handle it as an f64 value.
2728 DAG.getConstant(1, dl, MVT::i32));
2729 }
2730 // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is
2731 // available.
2732 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
2734 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2735 fmrrd.getValue(isLittleEndian ? 0 : 1),
2736 Flag);
2737 Flag = Chain.getValue(1);
2738 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2739 VA = RVLocs[++i]; // skip ahead to next loc
2740 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2741 fmrrd.getValue(isLittleEndian ? 1 : 0),
2742 Flag);
2743 } else
2744 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
2745
2746 // Guarantee that all emitted copies are
2747 // stuck together, avoiding something bad.
2748 Flag = Chain.getValue(1);
2749 RetOps.push_back(DAG.getRegister(VA.getLocReg(),
2750 ReturnF16 ? MVT::f16 : VA.getLocVT()));
2751 }
2752 const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
2753 const MCPhysReg *I =
2754 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
2755 if (I) {
2756 for (; *I; ++I) {
2757 if (ARM::GPRRegClass.contains(*I))
2758 RetOps.push_back(DAG.getRegister(*I, MVT::i32));
2759 else if (ARM::DPRRegClass.contains(*I))
2761 else
2762 llvm_unreachable("Unexpected register class in CSRsViaCopy!");
2763 }
2764 }
2765
2766 // Update chain and glue.
2767 RetOps[0] = Chain;
2768 if (Flag.getNode())
2769 RetOps.push_back(Flag);
2770
2771 // CPUs which aren't M-class use a special sequence to return from
2772 // exceptions (roughly, any instruction setting pc and cpsr simultaneously,
2773 // though we use "subs pc, lr, #N").
2774 //
2775 // M-class CPUs actually use a normal return sequence with a special
2776 // (hardware-provided) value in LR, so the normal code path works.
2777 if (DAG.getMachineFunction().getFunction().hasFnAttribute("interrupt") &&
2778 !Subtarget->isMClass()) {
2779 if (Subtarget->isThumb1Only())
2780 report_fatal_error("interrupt attribute is not supported in Thumb1");
2781 return LowerInterruptReturn(RetOps, dl, DAG);
2782 }
2783
2784 return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, RetOps);
2785}
2786
2787bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
2788 if (N->getNumValues() != 1)
2789 return false;
2790 if (!N->hasNUsesOfValue(1, 0))
2791 return false;
2792
2793 SDValue TCChain = Chain;
2794 SDNode *Copy = *N->use_begin();
2795 if (Copy->getOpcode() == ISD::CopyToReg) {
2796 // If the copy has a glue operand, we conservatively assume it isn't safe to
2797 // perform a tail call.
2798 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
2799 return false;
2800 TCChain = Copy->getOperand(0);
2801 } else if (Copy->getOpcode() == ARMISD::VMOVRRD) {
2802 SDNode *VMov = Copy;
2803 // f64 returned in a pair of GPRs.
2805 for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
2806 UI != UE; ++UI) {
2807 if (UI->getOpcode() != ISD::CopyToReg)
2808 return false;
2809 Copies.insert(*UI);
2810 }
2811 if (Copies.size() > 2)
2812 return false;
2813
2814 for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
2815 UI != UE; ++UI) {
2816 SDValue UseChain = UI->getOperand(0);
2817 if (Copies.count(UseChain.getNode()))
2818 // Second CopyToReg
2819 Copy = *UI;
2820 else {
2821 // We are at the top of this chain.
2822 // If the copy has a glue operand, we conservatively assume it
2823 // isn't safe to perform a tail call.
2824 if (UI->getOperand(UI->getNumOperands()-1).getValueType() == MVT::Glue)
2825 return false;
2826 // First CopyToReg
2827 TCChain = UseChain;
2828 }
2829 }
2830 } else if (Copy->getOpcode() == ISD::BITCAST) {
2831 // f32 returned in a single GPR.
2832 if (!Copy->hasOneUse())
2833 return false;
2834 Copy = *Copy->use_begin();
2835 if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0))
2836 return false;
2837 // If the copy has a glue operand, we conservatively assume it isn't safe to
2838 // perform a tail call.
2839 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
2840 return false;
2841 TCChain = Copy->getOperand(0);
2842 } else {
2843 return false;
2844 }
2845
2846 bool HasRet = false;
2847 for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
2848 UI != UE; ++UI) {
2849 if (UI->getOpcode() != ARMISD::RET_FLAG &&
2850 UI->getOpcode() != ARMISD::INTRET_FLAG)
2851 return false;
2852 HasRet = true;
2853 }
2854
2855 if (!HasRet)
2856 return false;
2857
2858 Chain = TCChain;
2859 return true;
2860}
2861
2862bool ARMTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
2863 if (!Subtarget->supportsTailCall())
2864 return false;
2865
2866 auto Attr =
2867 CI->getParent()->getParent()->getFnAttribute("disable-tail-calls");
2868 if (!CI->isTailCall() || Attr.getValueAsString() == "true")
2869 return false;
2870
2871 return true;
2872}
2873
2874// Trying to write a 64 bit value so need to split into two 32 bit values first,
2875// and pass the lower and high parts through.
2877 SDLoc DL(Op);
2878 SDValue WriteValue = Op->getOperand(2);
2879
2880 // This function is only supposed to be called for i64 type argument.
2881 assert(WriteValue.getValueType() == MVT::i64
2882 && "LowerWRITE_REGISTER called for non-i64 type argument.");
2883
2884 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue,
2885 DAG.getConstant(0, DL, MVT::i32));
2886 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue,
2887 DAG.getConstant(1, DL, MVT::i32));
2888 SDValue Ops[] = { Op->getOperand(0), Op->getOperand(1), Lo, Hi };
2889 return DAG.getNode(ISD::WRITE_REGISTER, DL, MVT::Other, Ops);
2890}
2891
2892// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
2893// their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
2894// one of the above mentioned nodes. It has to be wrapped because otherwise
2895// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
2896// be used to form addressing mode. These wrapped nodes will be selected
2897// into MOVi.
2898SDValue ARMTargetLowering::LowerConstantPool(SDValue Op,
2899 SelectionDAG &DAG) const {
2900 EVT PtrVT = Op.getValueType();
2901 // FIXME there is no actual debug info here
2902 SDLoc dl(Op);
2903 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
2904 SDValue Res;
2905
2906 // When generating execute-only code Constant Pools must be promoted to the
2907 // global data section. It's a bit ugly that we can't share them across basic
2908 // blocks, but this way we guarantee that execute-only behaves correct with
2909 // position-independent addressing modes.
2910 if (Subtarget->genExecuteOnly()) {
2911 auto AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
2912 auto T = const_cast<Type*>(CP->getType());
2913 auto C = const_cast<Constant*>(CP->getConstVal());
2914 auto M = const_cast<Module*>(DAG.getMachineFunction().
2915 getFunction().getParent());
2916 auto GV = new GlobalVariable(
2917 *M, T, /*isConstant=*/true, GlobalVariable::InternalLinkage, C,
2920 Twine(AFI->createPICLabelUId())
2921 );
2922 SDValue GA = DAG.getTargetGlobalAddress(dyn_cast<GlobalValue>(GV),
2923 dl, PtrVT);
2924 return LowerGlobalAddress(GA, DAG);
2925 }
2926
2927 if (CP->isMachineConstantPoolEntry())
2928 Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
2929 CP->getAlignment());
2930 else
2931 Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
2932 CP->getAlignment());
2933 return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
2934}
2935
2938}
2939
2940SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
2941 SelectionDAG &DAG) const {
2944 unsigned ARMPCLabelIndex = 0;
2945 SDLoc DL(Op);
2946 EVT PtrVT = getPointerTy(DAG.getDataLayout());
2947 const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
2948 SDValue CPAddr;
2949 bool IsPositionIndependent = isPositionIndependent() || Subtarget->isROPI();
2950 if (!IsPositionIndependent) {
2951 CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4);
2952 } else {
2953 unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
2954 ARMPCLabelIndex = AFI->createPICLabelUId();
2956 ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex,
2957 ARMCP::CPBlockAddress, PCAdj);
2958 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2959 }
2960 CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
2961 SDValue Result = DAG.getLoad(
2962 PtrVT, DL, DAG.getEntryNode(), CPAddr,
2964 if (!IsPositionIndependent)
2965 return Result;
2966 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, DL, MVT::i32);
2967 return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
2968}
2969
2970/// Convert a TLS address reference into the correct sequence of loads
2971/// and calls to compute the variable's address for Darwin, and return an
2972/// SDValue containing the final node.
2973
2974/// Darwin only has one TLS scheme which must be capable of dealing with the
2975/// fully general situation, in the worst case. This means:
2976/// + "extern __thread" declaration.
2977/// + Defined in a possibly unknown dynamic library.
2978///
2979/// The general system is that each __thread variable has a [3 x i32] descriptor
2980/// which contains information used by the runtime to calculate the address. The
2981/// only part of this the compiler needs to know about is the first word, which
2982/// contains a function pointer that must be called with the address of the
2983/// entire descriptor in "r0".
2984///
2985/// Since this descriptor may be in a different unit, in general access must
2986/// proceed along the usual ARM rules. A common sequence to produce is:
2987///
2988/// movw rT1, :lower16:_var$non_lazy_ptr
2989/// movt rT1, :upper16:_var$non_lazy_ptr
2990/// ldr r0, [rT1]
2991/// ldr rT2, [r0]
2992/// blx rT2
2993/// [...address now in r0...]
2994SDValue
2995ARMTargetLowering::LowerGlobalTLSAddressDarwin(SDValue Op,
2996 SelectionDAG &DAG) const {
2997 assert(Subtarget->isTargetDarwin() &&
2998 "This function expects a Darwin target");
2999 SDLoc DL(Op);
3000
3001 // First step is to get the address of the actua global symbol. This is where
3002 // the TLS descriptor lives.
3003 SDValue DescAddr = LowerGlobalAddressDarwin(Op, DAG);
3004
3005 // The first entry in the descriptor is a function pointer that we must call
3006 // to obtain the address of the variable.
3007 SDValue Chain = DAG.getEntryNode();
3008 SDValue FuncTLVGet = DAG.getLoad(
3009 MVT::i32, DL, Chain, DescAddr,
3011 /* Alignment = */ 4,
3014 Chain = FuncTLVGet.getValue(1);
3015
3017 MachineFrameInfo &MFI = F.getFrameInfo();
3018 MFI.setAdjustsStack(true);
3019
3020 // TLS calls preserve all registers except those that absolutely must be
3021 // trashed: R0 (it takes an argument), LR (it's a call) and CPSR (let's not be
3022 // silly).
3023 auto TRI =
3025 auto ARI = static_cast<const ARMRegisterInfo *>(TRI);
3027
3028 // Finally, we can make the call. This is just a degenerate version of a
3029 // normal AArch64 call node: r0 takes the address of the descriptor, and
3030 // returns the address of the variable in this thread.
3031 Chain = DAG.getCopyToReg(Chain, DL, ARM::R0, DescAddr, SDValue());
3032 Chain =
3034 Chain, FuncTLVGet, DAG.getRegister(ARM::R0, MVT::i32),
3035 DAG.getRegisterMask(Mask), Chain.getValue(1));
3036 return DAG.getCopyFromReg(Chain, DL, ARM::R0, MVT::i32, Chain.getValue(1));
3037}
3038
3039SDValue
3040ARMTargetLowering::LowerGlobalTLSAddressWindows(SDValue Op,
3041 SelectionDAG &DAG) const {
3042 assert(Subtarget->isTargetWindows() && "Windows specific TLS lowering");
3043
3044 SDValue Chain = DAG.getEntryNode();
3045 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3046 SDLoc DL(Op);
3047
3048 // Load the current TEB (thread environment block)
3049 SDValue Ops[] = {Chain,
3050 DAG.getConstant(Intrinsic::arm_mrc, DL, MVT::i32),
3051 DAG.getConstant(15, DL, MVT::i32),
3052 DAG.getConstant(0, DL, MVT::i32),
3053 DAG.getConstant(13, DL, MVT::i32),
3054 DAG.getConstant(0, DL, MVT::i32),
3055 DAG.getConstant(2, DL, MVT::i32)};
3056 SDValue CurrentTEB = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
3057 DAG.getVTList(MVT::i32, MVT::Other), Ops);
3058
3059 SDValue TEB = CurrentTEB.getValue(0);
3060 Chain = CurrentTEB.getValue(1);
3061
3062 // Load the ThreadLocalStoragePointer from the TEB
3063 // A pointer to the TLS array is located at offset 0x2c from the TEB.
3064 SDValue TLSArray =
3065 DAG.getNode(ISD::ADD, DL, PtrVT, TEB, DAG.getIntPtrConstant(0x2c, DL));
3066 TLSArray = DAG.getLoad(PtrVT, DL, Chain, TLSArray, MachinePointerInfo());
3067
3068 // The pointer to the thread's TLS data area is at the TLS Index scaled by 4
3069 // offset into the TLSArray.
3070
3071 // Load the TLS index from the C runtime
3072 SDValue TLSIndex =
3073 DAG.getTargetExternalSymbol("_tls_index", PtrVT, ARMII::MO_NO_FLAG);
3074 TLSIndex = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, TLSIndex);
3075 TLSIndex = DAG.getLoad(PtrVT, DL, Chain, TLSIndex, MachinePointerInfo());
3076
3077 SDValue Slot = DAG.getNode(ISD::SHL, DL, PtrVT, TLSIndex,
3078 DAG.getConstant(2, DL, MVT::i32));
3079 SDValue TLS = DAG.getLoad(PtrVT, DL, Chain,
3080 DAG.getNode(ISD::ADD, DL, PtrVT, TLSArray, Slot),
3082
3083 // Get the offset of the start of the .tls section (section base)
3084 const auto *GA = cast<GlobalAddressSDNode>(Op);
3085 auto *CPV = ARMConstantPoolConstant::Create(GA->getGlobal(), ARMCP::SECREL);
3086 SDValue Offset = DAG.getLoad(
3087 PtrVT, DL, Chain, DAG.getNode(ARMISD::Wrapper, DL, MVT::i32,
3088 DAG.getTargetConstantPool(CPV, PtrVT, 4)),
3090
3091 return DAG.getNode(ISD::ADD, DL, PtrVT, TLS, Offset);
3092}
3093
3094// Lower ISD::GlobalTLSAddress using the "general dynamic" model
3095SDValue
3096ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
3097 SelectionDAG &DAG) const {
3098 SDLoc dl(GA);
3099 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3100 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
3103 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3105 ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
3106 ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true);
3107 SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4);
3109 Argument = DAG.getLoad(
3110 PtrVT, dl, DAG.getEntryNode(), Argument,
3112 SDValue Chain = Argument.getValue(1);
3113
3114 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3115 Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);
3116
3117 // call __tls_get_addr.
3119 ArgListEntry Entry;
3120 Entry.Node = Argument;
3121 Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext());
3122 Args.push_back(Entry);
3123
3124 // FIXME: is there useful debug info available here?
3126 CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
3128 DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args));
3129
3130 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
3131 return CallResult.first;
3132}
3133
3134// Lower ISD::GlobalTLSAddress using the "initial exec" or
3135// "local exec" model.
3136SDValue
3137ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
3138 SelectionDAG &DAG,
3139 TLSModel::Model model) const {
3140 const GlobalValue *GV = GA->getGlobal();
3141 SDLoc dl(GA);
3143 SDValue Chain = DAG.getEntryNode();
3144 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3145 // Get the Thread Pointer
3147
3148 if (model == TLSModel::InitialExec) {
3151 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3152 // Initial exec model.
3153 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
3155 ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
3157 true);
3158 Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
3159 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
3160 Offset = DAG.getLoad(
3161 PtrVT, dl, Chain, Offset,
3163 Chain = Offset.getValue(1);
3164
3165 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3166 Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
3167
3168 Offset = DAG.getLoad(
3169 PtrVT, dl, Chain, Offset,
3171 } else {
3172 // local exec model
3173 assert(model == TLSModel::LocalExec);
3176 Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
3177 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
3178 Offset = DAG.getLoad(
3179 PtrVT, dl, Chain, Offset,
3181 }
3182
3183 // The address of the thread local variable is the add of the thread
3184 // pointer with the offset of the variable.
3185 return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
3186}
3187
3188SDValue
3189ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
3190 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3191 if (DAG.getTarget().useEmulatedTLS())
3192 return LowerToTLSEmulatedModel(GA, DAG);
3193
3194 if (Subtarget->isTargetDarwin())
3195 return LowerGlobalTLSAddressDarwin(Op, DAG);
3196
3197 if (Subtarget->isTargetWindows())
3198 return LowerGlobalTLSAddressWindows(Op, DAG);
3199
3200 // TODO: implement the "local dynamic" model
3201 assert(Subtarget->isTargetELF() && "Only ELF implemented here");
3203
3204 switch (model) {
3207 return LowerToTLSGeneralDynamicModel(GA, DAG);
3210 return LowerToTLSExecModels(GA, DAG, model);
3211 }
3212 llvm_unreachable("bogus TLS model");
3213}
3214
3215/// Return true if all users of V are within function F, looking through
3216/// ConstantExprs.
3217static bool allUsersAreInFunction(const Value *V, const Function *F) {
3219 for (auto *U : V->users())
3220 Worklist.push_back(U);
3221 while (!Worklist.empty()) {
3222 auto *U = Worklist.pop_back_val();
3223 if (isa<ConstantExpr>(U)) {
3224 for (auto *UU : U->users())
3225 Worklist.push_back(UU);
3226 continue;
3227 }
3228
3229 auto *I = dyn_cast<Instruction>(U);
3230 if (!I || I->getParent()->getParent() != F)
3231 return false;
3232 }
3233 return true;
3234}
3235
3237 const GlobalValue *GV, SelectionDAG &DAG,
3238 EVT PtrVT, const SDLoc &dl) {
3239 // If we're creating a pool entry for a constant global with unnamed address,
3240 // and the global is small enough, we can emit it inline into the constant pool
3241 // to save ourselves an indirection.
3242 //
3243 // This is a win if the constant is only used in one function (so it doesn't
3244 // need to be duplicated) or duplicating the constant wouldn't increase code
3245 // size (implying the constant is no larger than 4 bytes).
3246 const Function &F = DAG.getMachineFunction().getFunction();
3247
3248 // We rely on this decision to inline being idemopotent and unrelated to the
3249 // use-site. We know that if we inline a variable at one use site, we'll
3250 // inline it elsewhere too (and reuse the constant pool entry). Fast-isel
3251 // doesn't know about this optimization, so bail out if it's enabled else
3252 // we could decide to inline here (and thus never emit the GV) but require
3253 // the GV from fast-isel generated code.
3256 return SDValue();
3257
3258 auto *GVar = dyn_cast<GlobalVariable>(GV);
3259 if (!GVar || !GVar->hasInitializer() ||
3260 !GVar->isConstant() || !GVar->hasGlobalUnnamedAddr() ||
3261 !GVar->hasLocalLinkage())
3262 return SDValue();
3263
3264 // If we inline a value that contains relocations, we move the relocations
3265 // from .data to .text. This is not allowed in position-independent code.
3266 auto *Init = GVar->getInitializer();
3267 if ((TLI->isPositionIndependent() || TLI->getSubtarget()->isROPI()) &&
3268 Init->needsRelocation())
3269 return SDValue();
3270
3271 // The constant islands pass can only really deal with alignment requests
3272 // <= 4 bytes and cannot pad constants itself. Therefore we cannot promote
3273 // any type wanting greater alignment requirements than 4 bytes. We also
3274 // can only promote constants that are multiples of 4 bytes in size or
3275 // are paddable to a multiple of 4. Currently we only try and pad constants
3276 // that are strings for simplicity.
3277 auto *CDAInit = dyn_cast<ConstantDataArray>(Init);
3278 unsigned Size = DAG.getDataLayout().getTypeAllocSize(Init->getType());
3279 unsigned Align = DAG.getDataLayout().getPreferredAlignment(GVar);
3280 unsigned RequiredPadding = 4 - (Size % 4);
3281 bool PaddingPossible =
3282 RequiredPadding == 4 || (CDAInit && CDAInit->isString());
3283 if (!PaddingPossible || Align > 4 || Size > ConstpoolPromotionMaxSize ||
3284 Size == 0)
3285 return SDValue();
3286
3287 unsigned PaddedSize = Size + ((RequiredPadding == 4) ? 0 : RequiredPadding);
3290
3291 // We can't bloat the constant pool too much, else the ConstantIslands pass
3292 // may fail to converge. If we haven't promoted this global yet (it may have
3293 // multiple uses), and promoting it would increase the constant pool size (Sz
3294 // > 4), ensure we have space to do so up to MaxTotal.
3295 if (!AFI->getGlobalsPromotedToConstantPool().count(GVar) && Size > 4)
3296 if (AFI->getPromotedConstpoolIncrease() + PaddedSize - 4 >=
3298 return SDValue();
3299
3300 // This is only valid if all users are in a single function; we can't clone
3301 // the constant in general. The LLVM IR unnamed_addr allows merging
3302 // constants, but not cloning them.
3303 //
3304 // We could potentially allow cloning if we could prove all uses of the
3305 // constant in the current function don't care about the address, like
3306 // printf format strings. But that isn't implemented for now.
3307 if (!allUsersAreInFunction(GVar, &F))
3308 return SDValue();
3309
3310 // We're going to inline this global. Pad it out if needed.
3311 if (RequiredPadding != 4) {
3312 StringRef S = CDAInit->getAsString();
3313
3315 std::copy(S.bytes_begin(), S.bytes_end(), V.begin());
3316 while (RequiredPadding--)
3317 V.push_back(0);
3319 }
3320
3321 auto CPVal = ARMConstantPoolConstant::Create(GVar, Init);
3322 SDValue CPAddr =
3323 DAG.getTargetConstantPool(CPVal, PtrVT, /*Align=*/4);
3324 if (!AFI->getGlobalsPromotedToConstantPool().count(GVar)) {
3327 PaddedSize - 4);
3328 }
3329 ++NumConstpoolPromoted;
3330 return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3331}
3332
3334 if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
3335 if (!(GV = GA->getBaseObject()))
3336 return false;
3337 if (const auto *V = dyn_cast<GlobalVariable>(GV))
3338 return V->isConstant();
3339 return isa<Function>(GV);
3340}
3341
3342SDValue ARMTargetLowering::LowerGlobalAddress(SDValue Op,
3343 SelectionDAG &DAG) const {
3344 switch (Subtarget->getTargetTriple().getObjectFormat()) {
3345 default: llvm_unreachable("unknown object format");
3346 case Triple::COFF:
3347 return LowerGlobalAddressWindows(Op, DAG);
3348 case Triple::ELF:
3349 return LowerGlobalAddressELF(Op, DAG);
3350 case Triple::MachO:
3351 return LowerGlobalAddressDarwin(Op, DAG);
3352 }
3353}
3354
3355SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
3356 SelectionDAG &DAG) const {
3357 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3358 SDLoc dl(Op);
3359 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3361 bool IsRO = isReadOnly(GV);
3362
3363 // promoteToConstantPool only if not generating XO text section
3364 if (TM.shouldAssumeDSOLocal(*GV->getParent(), GV) && !Subtarget->genExecuteOnly())
3365 if (SDValue V = promoteToConstantPool(this, GV, DAG, PtrVT, dl))
3366 return V;
3367
3368 if (isPositionIndependent()) {
3369 bool UseGOT_PREL = !TM.shouldAssumeDSOLocal(*GV->getParent(), GV);
3370 SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3371 UseGOT_PREL ? ARMII::MO_GOT : 0);
3372 SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G);
3373 if (UseGOT_PREL)
3374 Result =
3375 DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
3377 return Result;
3378 } else if (Subtarget->isROPI() && IsRO) {
3379 // PC-relative.
3380 SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT);
3381 SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G);
3382 return Result;
3383 } else if (Subtarget->isRWPI() && !IsRO) {
3384 // SB-relative.
3385 SDValue RelAddr;
3386 if (Subtarget->useMovt()) {
3387 ++NumMovwMovt;
3388 SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_SBREL);
3389 RelAddr = DAG.getNode(ARMISD::Wrapper, dl, PtrVT, G);
3390 } else { // use literal pool for address constant
3393 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
3394 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3395 RelAddr = DAG.getLoad(
3396 PtrVT, dl, DAG.getEntryNode(), CPAddr,
3398 }
3399 SDValue SB = DAG.getCopyFromReg(DAG.getEntryNode(), dl, ARM::R9, PtrVT);
3400 SDValue Result = DAG.getNode(ISD::ADD, dl, PtrVT, SB, RelAddr);
3401 return Result;
3402 }
3403
3404 // If we have T2 ops, we can materialize the address directly via movt/movw
3405 // pair. This is always cheaper.
3406 if (Subtarget->useMovt()) {
3407 ++NumMovwMovt;
3408 // FIXME: Once remat is capable of dealing with instructions with register
3409 // operands, expand this into two nodes.
3410 return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
3411 DAG.getTargetGlobalAddress(GV, dl, PtrVT));
3412 } else {
3413 SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
3414 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3415 return DAG.getLoad(
3416 PtrVT, dl, DAG.getEntryNode(), CPAddr,
3418 }
3419}
3420
3421SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
3422 SelectionDAG &DAG) const {
3423 assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
3424 "ROPI/RWPI not currently supported for Darwin");
3425 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3426 SDLoc dl(Op);
3427 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3428
3429 if (Subtarget->useMovt())
3430 ++NumMovwMovt;
3431
3432 // FIXME: Once remat is capable of dealing with instructions with register
3433 // operands, expand this into multiple nodes
3434 unsigned Wrapper =
3436
3437 SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_NONLAZY);
3438 SDValue Result = DAG.getNode(Wrapper, dl, PtrVT, G);
3439
3440 if (Subtarget->isGVIndirectSymbol(GV))
3441 Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
3443 return Result;
3444}
3445
3446SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op,
3447 SelectionDAG &DAG) const {
3448 assert(Subtarget->isTargetWindows() && "non-Windows COFF is not supported");
3449 assert(Subtarget->useMovt() &&
3450 "Windows on ARM expects to use movw/movt");
3451 assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
3452 "ROPI/RWPI not currently supported for Windows");
3453
3455 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3456 ARMII::TOF TargetFlags = ARMII::MO_NO_FLAG;
3457 if (GV->hasDLLImportStorageClass())
3458 TargetFlags = ARMII::MO_DLLIMPORT;
3459 else if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
3460 TargetFlags = ARMII::MO_COFFSTUB;
3461 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3463 SDLoc DL(Op);
3464
3465 ++NumMovwMovt;
3466
3467 // FIXME: Once remat is capable of dealing with instructions with register
3468 // operands, expand this into two nodes.
3469 Result = DAG.getNode(ARMISD::Wrapper, DL, PtrVT,
3470 DAG.getTargetGlobalAddress(GV, DL, PtrVT, /*offset=*/0,
3471 TargetFlags));
3472 if (TargetFlags & (ARMII::MO_DLLIMPORT | ARMII::MO_COFFSTUB))
3473 Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
3475 return Result;
3476}
3477
3478SDValue
3479ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
3480 SDLoc dl(Op);
3481 SDValue Val = DAG.getConstant(0, dl, MVT::i32);
3482 return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl,
3483 DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0),
3484 Op.getOperand(1), Val);
3485}
3486
3487SDValue
3488ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const {
3489 SDLoc dl(Op);
3490 return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0),
3491 Op.getOperand(1), DAG.getConstant(0, dl, MVT::i32));
3492}
3493
3494SDValue ARMTargetLowering::LowerEH_SJLJ_SETUP_DISPATCH(SDValue Op,
3495 SelectionDAG &DAG) const {
3496 SDLoc dl(Op);
3498 Op.getOperand(0));
3499}
3500
3501SDValue
3502ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
3503 const ARMSubtarget *Subtarget) const {
3504 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3505 SDLoc dl(Op);
3506 switch (IntNo) {
3507 default: return SDValue(); // Don't custom lower most intrinsics.
3508 case Intrinsic::thread_pointer: {
3509 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3510 return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
3511 }
3512 case Intrinsic::eh_sjlj_lsda: {
3515 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3516 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3517 SDValue CPAddr;
3518 bool IsPositionIndependent = isPositionIndependent();
3519 unsigned PCAdj = IsPositionIndependent ? (Subtarget->isThumb() ? 4 : 8) : 0;
3521 ARMConstantPoolConstant::Create(&MF.getFunction(), ARMPCLabelIndex,
3522 ARMCP::CPLSDA, PCAdj);
3523 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
3524 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3525 SDValue Result = DAG.getLoad(
3526 PtrVT, dl, DAG.getEntryNode(), CPAddr,
3528
3529 if (IsPositionIndependent) {
3530 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3531 Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
3532 }
3533 return Result;
3534 }
3535 case Intrinsic::arm_neon_vabs:
3536 return DAG.getNode(ISD::ABS, SDLoc(Op), Op.getValueType(),
3537 Op.getOperand(1));
3538 case Intrinsic::arm_neon_vmulls:
3539 case Intrinsic::arm_neon_vmullu: {
3540 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls)
3542 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3543 Op.getOperand(1), Op.getOperand(2));
3544 }
3545 case Intrinsic::arm_neon_vminnm:
3546 case Intrinsic::arm_neon_vmaxnm: {
3547 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminnm)
3549 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3550 Op.getOperand(1), Op.getOperand(2));
3551 }
3552 case Intrinsic::arm_neon_vminu:
3553 case Intrinsic::arm_neon_vmaxu: {
3554 if (Op.getValueType().isFloatingPoint())
3555 return SDValue();
3556 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminu)
3557 ? ISD::UMIN : ISD::UMAX;
3558 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3559 Op.getOperand(1), Op.getOperand(2));
3560 }
3561 case Intrinsic::arm_neon_vmins:
3562 case Intrinsic::arm_neon_vmaxs: {
3563 // v{min,max}s is overloaded between signed integers and floats.
3564 if (!Op.getValueType().isFloatingPoint()) {
3565 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
3566 ? ISD::SMIN : ISD::SMAX;
3567 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3568 Op.getOperand(1), Op.getOperand(2));
3569 }
3570 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
3572 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3573 Op.getOperand(1), Op.getOperand(2));
3574 }
3575 case Intrinsic::arm_neon_vtbl1:
3576 return DAG.getNode(ARMISD::VTBL1, SDLoc(Op), Op.getValueType(),
3577 Op.getOperand(1), Op.getOperand(2));
3578 case Intrinsic::arm_neon_vtbl2:
3579 return DAG.getNode(ARMISD::VTBL2, SDLoc(Op), Op.getValueType(),
3580 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
3581 }
3582}
3583
3585 const ARMSubtarget *Subtarget) {
3586 SDLoc dl(Op);
3587 ConstantSDNode *SSIDNode = cast<ConstantSDNode>(Op.getOperand(2));
3588 auto SSID = static_cast<SyncScope::ID>(SSIDNode->getZExtValue());
3589 if (SSID == SyncScope::SingleThread)
3590 return Op;
3591
3592 if (!Subtarget->hasDataBarrier()) {
3593 // Some ARMv6 cpus can support data barriers with an mcr instruction.
3594 // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
3595 // here.
3596 assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&
3597 "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!");
3598 return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),
3599 DAG.getConstant(0, dl, MVT::i32));
3600 }
3601
3602 ConstantSDNode *OrdN = cast<ConstantSDNode>(Op.getOperand(1));
3603 AtomicOrdering Ord = static_cast<AtomicOrdering>(OrdN->getZExtValue());
3605 if (Subtarget->isMClass()) {
3606 // Only a full system barrier exists in the M-class architectures.
3607 Domain = ARM_MB::SY;
3608 } else if (Subtarget->preferISHSTBarriers() &&
3609 Ord == AtomicOrdering::Release) {
3610 // Swift happens to implement ISHST barriers in a way that's compatible with
3611 // Release semantics but weaker than ISH so we'd be fools not to use
3612 // it. Beware: other processors probably don't!
3613 Domain = ARM_MB::ISHST;
3614 }
3615
3616 return DAG.getNode(ISD::INTRINSIC_VOID, dl, MVT::Other, Op.getOperand(0),
3617 DAG.getConstant(Intrinsic::arm_dmb, dl, MVT::i32),
3618 DAG.getConstant(Domain, dl, MVT::i32));
3619}
3620
3622 const ARMSubtarget *Subtarget) {
3623 // ARM pre v5TE and Thumb1 does not have preload instructions.
3624 if (!(Subtarget->isThumb2() ||
3625 (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps())))
3626 // Just preserve the chain.
3627 return Op.getOperand(0);
3628
3629 SDLoc dl(Op);
3630 unsigned isRead = ~cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() & 1;
3631 if (!isRead &&
3632 (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension()))
3633 // ARMv7 with MP extension has PLDW.
3634 return Op.getOperand(0);
3635
3636 unsigned isData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
3637 if (Subtarget->isThumb()) {
3638 // Invert the bits.
3639 isRead = ~isRead & 1;
3640 isData = ~isData & 1;
3641 }
3642
3643 return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0),
3644 Op.getOperand(1), DAG.getConstant(isRead, dl, MVT::i32),
3645 DAG.getConstant(isData, dl, MVT::i32));
3646}
3647
3650 ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>();
3651
3652 // vastart just stores the address of the VarArgsFrameIndex slot into the
3653 // memory location argument.
3654 SDLoc dl(Op);
3656 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3657 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3658 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
3659 MachinePointerInfo(SV));
3660}
3661
3662SDValue ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA,
3663 CCValAssign &NextVA,
3664 SDValue &Root,
3665 SelectionDAG &DAG,
3666 const SDLoc &dl) const {
3669
3670 const TargetRegisterClass *RC;
3671 if (AFI->isThumb1OnlyFunction())
3672 RC = &ARM::tGPRRegClass;
3673 else
3674 RC = &ARM::GPRRegClass;
3675
3676 // Transform the arguments stored in physical registers into virtual ones.
3677 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3678 SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
3679
3680 SDValue ArgValue2;
3681 if (NextVA.isMemLoc()) {
3682 MachineFrameInfo &MFI = MF.getFrameInfo();
3683 int FI = MFI.CreateFixedObject(4, NextVA.getLocMemOffset(), true);
3684
3685 // Create load node to retrieve arguments from the stack.
3686 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
3687 ArgValue2 = DAG.getLoad(
3688 MVT::i32, dl, Root, FIN,
3690 } else {
3691 Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
3692 ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
3693 }
3694 if (!Subtarget->isLittle())
3695 std::swap (ArgValue, ArgValue2);
3696 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2);
3697}
3698
3699// The remaining GPRs hold either the beginning of variable-argument
3700// data, or the beginning of an aggregate passed by value (usually
3701// byval). Either way, we allocate stack slots adjacent to the data
3702// provided by our caller, and store the unallocated registers there.
3703// If this is a variadic function, the va_list pointer will begin with
3704// these values; otherwise, this reassembles a (byval) structure that
3705// was split between registers and memory.
3706// Return: The frame index registers were stored into.
3707int ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
3708 const SDLoc &dl, SDValue &Chain,
3709 const Value *OrigArg,
3710 unsigned InRegsParamRecordIdx,
3711 int ArgOffset, unsigned ArgSize) const {
3712 // Currently, two use-cases possible:
3713 // Case #1. Non-var-args function, and we meet first byval parameter.
3714 // Setup first unallocated register as first byval register;
3715 // eat all remained registers
3716 // (these two actions are performed by HandleByVal method).
3717 // Then, here, we initialize stack frame with
3718 // "store-reg" instructions.
3719 // Case #2. Var-args function, that doesn't contain byval parameters.
3720 // The same: eat all remained unallocated registers,
3721 // initialize stack frame.
3722
3724 MachineFrameInfo &MFI = MF.getFrameInfo();
3726 unsigned RBegin, REnd;
3727 if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {
3728 CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);
3729 } else {
3730 unsigned RBeginIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
3731 RBegin = RBeginIdx == 4 ? (unsigned)ARM::R4 : GPRArgRegs[RBeginIdx];
3732 REnd = ARM::R4;
3733 }
3734
3735 if (REnd != RBegin)
3736 ArgOffset = -4 * (ARM::R4 - RBegin);
3737
3738 auto PtrVT = getPointerTy(DAG.getDataLayout());
3739 int FrameIndex = MFI.CreateFixedObject(ArgSize, ArgOffset, false);
3740 SDValue FIN = DAG.getFrameIndex(FrameIndex, PtrVT);
3741
3743 const TargetRegisterClass *RC =
3744 AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
3745
3746 for (unsigned Reg = RBegin, i = 0; Reg < REnd; ++Reg, ++i) {
3747 unsigned VReg = MF.addLiveIn(Reg, RC);
3748 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
3749 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
3750 MachinePointerInfo(OrigArg, 4 * i));
3751 MemOps.push_back(Store);
3752 FIN = DAG.getNode(ISD::ADD, dl, PtrVT, FIN, DAG.getConstant(4, dl, PtrVT));
3753 }
3754
3755 if (!MemOps.empty())
3756 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3757 return FrameIndex;
3758}
3759
3760// Setup stack frame, the va_list pointer will start from.
3761void ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
3762 const SDLoc &dl, SDValue &Chain,
3763 unsigned ArgOffset,
3764 unsigned TotalArgRegsSaveSize,
3765 bool ForceMutable) const {
3768
3769 // Try to store any remaining integer argument regs
3770 // to their spots on the stack so that they may be loaded by dereferencing
3771 // the result of va_next.
3772 // If there is no regs to be stored, just point address after last
3773 // argument passed via stack.
3774 int FrameIndex = StoreByValRegs(CCInfo, DAG, dl, Chain, nullptr,
3775 CCInfo.getInRegsParamsCount(),
3776 CCInfo.getNextStackOffset(),
3777 std::max(4U, TotalArgRegsSaveSize));
3778 AFI->setVarArgsFrameIndex(FrameIndex);
3779}
3780
3781SDValue ARMTargetLowering::LowerFormalArguments(
3782 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3783 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3784 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3786 MachineFrameInfo &MFI = MF.getFrameInfo();
3787
3789
3790 // Assign locations to all of the incoming arguments.
3792 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
3793 *DAG.getContext());
3794 CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg));
3795
3796 SmallVector<SDValue, 16> ArgValues;
3797 SDValue ArgValue;
3798 Function::const_arg_iterator CurOrigArg = MF.getFunction().arg_begin();
3799 unsigned CurArgIdx = 0;
3800
3801 // Initially ArgRegsSaveSize is zero.
3802 // Then we increase this value each time we meet byval parameter.
3803 // We also increase this value in case of varargs function.
3804 AFI->setArgRegsSaveSize(0);
3805
3806 // Calculate the amount of stack space that we need to allocate to store
3807 // byval and variadic arguments that are passed in registers.
3808 // We need to know this before we allocate the first byval or variadic
3809 // argument, as they will be allocated a stack slot below the CFA (Canonical
3810 // Frame Address, the stack pointer at entry to the function).
3811 unsigned ArgRegBegin = ARM::R4;
3812 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3813 if (CCInfo.getInRegsParamsProcessed() >= CCInfo.getInRegsParamsCount())
3814 break;
3815
3816 CCValAssign &VA = ArgLocs[i];
3817 unsigned Index = VA.getValNo();
3818 ISD::ArgFlagsTy Flags = Ins[Index].Flags;
3819 if (!Flags.isByVal())
3820 continue;
3821
3822 assert(VA.isMemLoc() && "unexpected byval pointer in reg");
3823 unsigned RBegin, REnd;
3824 CCInfo.getInRegsParamInfo(CCInfo.getInRegsParamsProcessed(), RBegin, REnd);
3825 ArgRegBegin = std::min(ArgRegBegin, RBegin);
3826
3827 CCInfo.nextInRegsParam();
3828 }
3829 CCInfo.rewindByValRegsInfo();
3830
3831 int lastInsIndex = -1;
3832 if (isVarArg && MFI.hasVAStart()) {
3833 unsigned RegIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
3834 if (RegIdx != array_lengthof(GPRArgRegs))
3835 ArgRegBegin = std::min(ArgRegBegin, (unsigned)GPRArgRegs[RegIdx]);
3836 }
3837
3838 unsigned TotalArgRegsSaveSize = 4 * (ARM::R4 - ArgRegBegin);
3839 AFI->setArgRegsSaveSize(TotalArgRegsSaveSize);
3840 auto PtrVT = getPointerTy(DAG.getDataLayout());
3841
3842 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3843 CCValAssign &VA = ArgLocs[i];
3844 if (Ins[VA.getValNo()].isOrigArg()) {
3845 std::advance(CurOrigArg,
3846 Ins[VA.getValNo()].getOrigArgIndex() - CurArgIdx);
3847 CurArgIdx = Ins[VA.getValNo()].getOrigArgIndex();
3848 }
3849 // Arguments stored in registers.
3850 if (VA.isRegLoc()) {
3851 EVT RegVT = VA.getLocVT();
3852
3853 if (VA.needsCustom()) {
3854 // f64 and vector types are split up into multiple registers or
3855 // combinations of registers and stack slots.
3856 if (VA.getLocVT() == MVT::v2f64) {
3857 SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i],
3858 Chain, DAG, dl);
3859 VA = ArgLocs[++i]; // skip ahead to next loc
3860 SDValue ArgValue2;
3861 if (VA.isMemLoc()) {
3862 int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), true);
3863 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3864 ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN,
3866 DAG.getMachineFunction(), FI));
3867 } else {
3868 ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i],
3869 Chain, DAG, dl);
3870 }
3871 ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
3872 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
3873 ArgValue, ArgValue1,
3874 DAG.getIntPtrConstant(0, dl));
3875 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
3876 ArgValue, ArgValue2,
3877 DAG.getIntPtrConstant(1, dl));
3878 } else
3879 ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
3880 } else {
3881 const TargetRegisterClass *RC;
3882
3883
3884 if (RegVT == MVT::f16)
3885 RC = &ARM::HPRRegClass;
3886 else if (RegVT == MVT::f32)
3887 RC = &ARM::SPRRegClass;
3888 else if (RegVT == MVT::f64 || RegVT == MVT::v4f16)
3889 RC = &ARM::DPRRegClass;
3890 else if (RegVT == MVT::v2f64 || RegVT == MVT::v8f16)
3891 RC = &ARM::QPRRegClass;
3892 else if (RegVT == MVT::i32)
3893 RC = AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass
3894 : &ARM::GPRRegClass;
3895 else
3896 llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
3897
3898 // Transform the arguments in physical registers into virtual ones.
3899 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3900 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
3901 }
3902
3903 // If this is an 8 or 16-bit value, it is really passed promoted
3904 // to 32 bits. Insert an assert[sz]ext to capture this, then
3905 // truncate to the right size.
3906 switch (VA.getLocInfo()) {
3907 default: llvm_unreachable("Unknown loc info!");
3908 case CCValAssign::Full: break;
3909 case CCValAssign::BCvt:
3910 ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);
3911 break;
3912 case CCValAssign::SExt:
3913 ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
3914 DAG.getValueType(VA.getValVT()));
3915 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
3916 break;
3917 case CCValAssign::ZExt:
3918 ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
3919 DAG.getValueType(VA.getValVT()));
3920 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
3921 break;
3922 }
3923
3924 InVals.push_back(ArgValue);
3925 } else { // VA.isRegLoc()
3926 // sanity check
3927 assert(VA.isMemLoc());
3928 assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered");
3929
3930 int index = VA.getValNo();
3931
3932 // Some Ins[] entries become multiple ArgLoc[] entries.
3933 // Process them only once.
3934 if (index != lastInsIndex)
3935 {
3936 ISD::ArgFlagsTy Flags = Ins[index].Flags;
3937 // FIXME: For now, all byval parameter objects are marked mutable.
3938 // This can be changed with more analysis.
3939 // In case of tail call optimization mark all arguments mutable.
3940 // Since they could be overwritten by lowering of arguments in case of
3941 // a tail call.
3942 if (Flags.isByVal()) {
3943 assert(Ins[index].isOrigArg() &&
3944 "Byval arguments cannot be implicit");
3945 unsigned CurByValIndex = CCInfo.getInRegsParamsProcessed();
3946
3947 int FrameIndex = StoreByValRegs(
3948 CCInfo, DAG, dl, Chain, &*CurOrigArg, CurByValIndex,
3949 VA.getLocMemOffset(), Flags.getByValSize());
3950 InVals.push_back(DAG.getFrameIndex(FrameIndex, PtrVT));
3951 CCInfo.nextInRegsParam();
3952 } else {
3953 unsigned FIOffset = VA.getLocMemOffset();
3954 int FI = MFI.CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
3955 FIOffset, true);
3956
3957 // Create load nodes to retrieve arguments from the stack.
3958 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3959 InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
3961 DAG.getMachineFunction(), FI)));
3962 }
3963 lastInsIndex = index;
3964 }
3965 }
3966 }
3967
3968 // varargs
3969 if (isVarArg && MFI.hasVAStart())
3970 VarArgStyleRegisters(CCInfo, DAG, dl, Chain,
3971 CCInfo.getNextStackOffset(),
3972 TotalArgRegsSaveSize);
3973
3975
3976 return Chain;
3977}
3978
3979/// isFloatingPointZero - Return true if this is +0.0.
3981 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
3982 return CFP->getValueAPF().isPosZero();
3983 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
3984 // Maybe this has already been legalized into the constant pool?
3985 if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) {
3986 SDValue WrapperOp = Op.getOperand(1).getOperand(0);
3987 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp))
3988 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
3989 return CFP->getValueAPF().isPosZero();
3990 }
3991 } else if (Op->getOpcode() == ISD::BITCAST &&
3992 Op->getValueType(0) == MVT::f64) {
3993 // Handle (ISD::BITCAST (ARMISD::VMOVIMM (ISD::TargetConstant 0)) MVT::f64)
3994 // created by LowerConstantFP().
3995 SDValue BitcastOp = Op->getOperand(0);
3996 if (BitcastOp->getOpcode() == ARMISD::VMOVIMM &&
3997 isNullConstant(BitcastOp->getOperand(0)))
3998 return true;
3999 }
4000 return false;
4001}
4002
4003/// Returns appropriate ARM CMP (cmp) and corresponding condition code for
4004/// the given operands.
4005SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
4006 SDValue &ARMcc, SelectionDAG &DAG,
4007 const SDLoc &dl) const {
4008 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
4009 unsigned C = RHSC->getZExtValue();
4010 if (!isLegalICmpImmediate((int32_t)C)) {
4011 // Constant does not fit, try adjusting it by one.
4012 switch (CC) {
4013 default: break;
4014 case ISD::SETLT:
4015 case ISD::SETGE:
4016 if (C != 0x80000000 && isLegalICmpImmediate(C-1)) {
4017 CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
4018 RHS = DAG.getConstant(C - 1, dl, MVT::i32);
4019 }
4020 break;
4021 case ISD::SETULT:
4022 case ISD::SETUGE:
4023 if (C != 0 && isLegalICmpImmediate(C-1)) {
4024 CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
4025 RHS = DAG.getConstant(C - 1, dl, MVT::i32);
4026 }
4027 break;
4028 case ISD::SETLE:
4029 case ISD::SETGT:
4030 if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) {
4031 CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
4032 RHS = DAG.getConstant(C + 1, dl, MVT::i32);
4033 }
4034 break;
4035 case ISD::SETULE:
4036 case ISD::SETUGT:
4037 if (C != 0xffffffff && isLegalICmpImmediate(C+1)) {
4038 CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
4039 RHS = DAG.getConstant(C + 1, dl, MVT::i32);
4040 }
4041 break;
4042 }
4043 }
4044 } else if ((ARM_AM::getShiftOpcForNode(LHS.getOpcode()) != ARM_AM::no_shift) &&
4046 // In ARM and Thumb-2, the compare instructions can shift their second
4047 // operand.
4049 std::swap(LHS, RHS);
4050 }
4051
4053
4054 // If the RHS is a constant zero then the V (overflow) flag will never be
4055 // set. This can allow us to simplify GE to PL or LT to MI, which can be
4056 // simpler for other passes (like the peephole optimiser) to deal with.
4057 if (isNullConstant(RHS)) {
4058 switch (CondCode) {
4059 default: break;
4060 case ARMCC::GE:
4062 break;
4063 case ARMCC::LT:
4065 break;
4066 }
4067 }
4068
4069 ARMISD::NodeType CompareType;
4070 switch (CondCode) {
4071 default:
4072 CompareType = ARMISD::CMP;
4073 break;
4074 case ARMCC::EQ:
4075 case ARMCC::NE:
4076 // Uses only Z Flag
4077 CompareType = ARMISD::CMPZ;
4078 break;
4079 }
4080 ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
4081 return DAG.getNode(CompareType, dl, MVT::Glue, LHS, RHS);
4082}
4083
4084/// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
4085SDValue ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS,
4086 SelectionDAG &DAG, const SDLoc &dl,
4087 bool InvalidOnQNaN) const {
4088 assert(Subtarget->hasFP64() || RHS.getValueType() != MVT::f64);
4089 SDValue Cmp;
4090 SDValue C = DAG.getConstant(InvalidOnQNaN, dl, MVT::i32);
4091 if (!isFloatingPointZero(RHS))
4092 Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS, C);
4093 else
4094 Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Glue, LHS, C);
4095 return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp);
4096}
4097
4098/// duplicateCmp - Glue values can have only one use, so this function
4099/// duplicates a comparison node.
4100SDValue
4101ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const {
4102 unsigned Opc = Cmp.getOpcode();
4103 SDLoc DL(Cmp);
4104 if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ)
4105 return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
4106
4107 assert(Opc == ARMISD::FMSTAT && "unexpected comparison operation");
4108 Cmp = Cmp.getOperand(0);
4109 Opc = Cmp.getOpcode();
4110 if (Opc == ARMISD::CMPFP)
4111 Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),
4112 Cmp.getOperand(1), Cmp.getOperand(2));
4113 else {
4114 assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT");
4115 Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),
4116 Cmp.getOperand(1));
4117 }
4118 return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp);
4119}
4120
4121// This function returns three things: the arithmetic computation itself
4122// (Value), a comparison (OverflowCmp), and a condition code (ARMcc). The
4123// comparison and the condition code define the case in which the arithmetic
4124// computation *does not* overflow.
4125std::pair<SDValue, SDValue>
4126ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
4127 SDValue &ARMcc) const {
4128 assert(Op.getValueType() == MVT::i32 && "Unsupported value type");
4129
4130 SDValue Value, OverflowCmp;
4131 SDValue LHS = Op.getOperand(0);
4132 SDValue RHS = Op.getOperand(1);
4133 SDLoc dl(Op);
4134
4135 // FIXME: We are currently always generating CMPs because we don't support
4136 // generating CMN through the backend. This is not as good as the natural
4137 // CMP case because it causes a register dependency and cannot be folded
4138 // later.
4139
4140 switch (Op.getOpcode()) {
4141 default:
4142 llvm_unreachable("Unknown overflow instruction!");
4143 case ISD::SADDO:
4144 ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
4145 Value = DAG.getNode(ISD::ADD, dl, Op.getValueType(), LHS, RHS);
4146 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS);
4147 break;
4148 case ISD::UADDO:
4149 ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
4150 // We use ADDC here to correspond to its use in LowerUnsignedALUO.
4151 // We do not use it in the USUBO case as Value may not be used.
4152 Value = DAG.getNode(ARMISD::ADDC, dl,
4153 DAG.getVTList(Op.getValueType(), MVT::i32), LHS, RHS)
4154 .getValue(0);
4155 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS);
4156 break;
4157 case ISD::SSUBO:
4158 ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
4159 Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
4160 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS);
4161 break;
4162 case ISD::USUBO:
4163 ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
4164 Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
4165 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS);
4166 break;
4167 case ISD::UMULO:
4168 // We generate a UMUL_LOHI and then check if the high word is 0.
4169 ARMcc = DAG.getConstant(ARMCC::EQ, dl, MVT::i32);
4170 Value = DAG.getNode(ISD::UMUL_LOHI, dl,
4171 DAG.getVTList(Op.getValueType(), Op.getValueType()),
4172 LHS, RHS);
4173 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value.getValue(1),
4174 DAG.getConstant(0, dl, MVT::i32));
4175 Value = Value.getValue(0); // We only want the low 32 bits for the result.
4176 break;
4177 case ISD::SMULO:
4178 // We generate a SMUL_LOHI and then check if all the bits of the high word
4179 // are the same as the sign bit of the low word.
4180 ARMcc = DAG.getConstant(ARMCC::EQ, dl, MVT::i32);
4181 Value = DAG.getNode(ISD::SMUL_LOHI, dl,
4182 DAG.getVTList(Op.getValueType(), Op.getValueType()),
4183 LHS, RHS);
4184 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value.getValue(1),
4185 DAG.getNode(ISD::SRA, dl, Op.getValueType(),
4186 Value.getValue(0),
4187 DAG.getConstant(31, dl, MVT::i32)));
4188 Value = Value.getValue(0); // We only want the low 32 bits for the result.
4189 break;
4190 } // switch (...)
4191
4192 return std::make_pair(Value, OverflowCmp);
4193}
4194
4195SDValue
4196ARMTargetLowering::LowerSignedALUO(SDValue Op, SelectionDAG &DAG) const {
4197 // Let legalize expand this if it isn't a legal type yet.
4198 if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
4199 return SDValue();
4200
4201 SDValue Value, OverflowCmp;
4202 SDValue ARMcc;
4203 std::tie(Value, OverflowCmp) = getARMXALUOOp(Op, DAG, ARMcc);
4204 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4205 SDLoc dl(Op);
4206 // We use 0 and 1 as false and true values.
4207 SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
4208 SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
4209 EVT VT = Op.getValueType();
4210
4211 SDValue Overflow = DAG.getNode(ARMISD::CMOV, dl, VT, TVal, FVal,
4212 ARMcc, CCR, OverflowCmp);
4213
4214 SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
4215 return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
4216}
4217
4219 SelectionDAG &DAG) {
4220 SDLoc DL(BoolCarry);
4221 EVT CarryVT = BoolCarry.getValueType();
4222
4223 // This converts the boolean value carry into the carry flag by doing
4224 // ARMISD::SUBC Carry, 1
4225 SDValue Carry = DAG.getNode(ARMISD::SUBC, DL,
4226 DAG.getVTList(CarryVT, MVT::i32),
4227 BoolCarry, DAG.getConstant(1, DL, CarryVT));
4228 return Carry.getValue(1);
4229}
4230
4232 SelectionDAG &DAG) {
4233 SDLoc DL(Flags);
4234
4235 // Now convert the carry flag into a boolean carry. We do this
4236 // using ARMISD:ADDE 0, 0, Carry
4237 return DAG.getNode(ARMISD::ADDE, DL, DAG.getVTList(VT, MVT::i32),
4238 DAG.getConstant(0, DL, MVT::i32),
4239 DAG.getConstant(0, DL, MVT::i32), Flags);
4240}
4241
4242SDValue ARMTargetLowering::LowerUnsignedALUO(SDValue Op,
4243 SelectionDAG &DAG) const {
4244 // Let legalize expand this if it isn't a legal type yet.
4245 if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
4246 return SDValue();
4247
4248 SDValue LHS = Op.getOperand(0);
4249 SDValue RHS = Op.getOperand(1);
4250 SDLoc dl(Op);
4251
4252 EVT VT = Op.getValueType();
4253 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
4254 SDValue Value;
4255 SDValue Overflow;
4256 switch (Op.getOpcode()) {
4257 default:
4258 llvm_unreachable("Unknown overflow instruction!");
4259 case ISD::UADDO:
4260 Value = DAG.getNode(ARMISD::ADDC, dl, VTs, LHS, RHS);
4261 // Convert the carry flag into a boolean value.
4262 Overflow = ConvertCarryFlagToBooleanCarry(Value.getValue(1), VT, DAG);
4263 break;
4264 case ISD::USUBO: {
4265 Value = DAG.getNode(ARMISD::SUBC, dl, VTs, LHS, RHS);
4266 // Convert the carry flag into a boolean value.
4267 Overflow = ConvertCarryFlagToBooleanCarry(Value.getValue(1), VT, DAG);
4268 // ARMISD::SUBC returns 0 when we have to borrow, so make it an overflow
4269 // value. So compute 1 - C.
4270 Overflow = DAG.getNode(ISD::SUB, dl, MVT::i32,
4271 DAG.getConstant(1, dl, MVT::i32), Overflow);
4272 break;
4273 }
4274 }
4275
4276 return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
4277}
4278
4279SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
4280 SDValue Cond = Op.getOperand(0);
4281 SDValue SelectTrue = Op.getOperand(1);
4282 SDValue SelectFalse = Op.getOperand(2);
4283 SDLoc dl(Op);
4284 unsigned Opc = Cond.getOpcode();
4285
4286 if (Cond.getResNo() == 1 &&
4287 (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
4288 Opc == ISD::USUBO)) {
4289 if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0)))
4290 return SDValue();
4291
4292 SDValue Value, OverflowCmp;
4293 SDValue ARMcc;
4294 std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc);
4295 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4296 EVT VT = Op.getValueType();
4297
4298 return getCMOV(dl, VT, SelectTrue, SelectFalse, ARMcc, CCR,
4299 OverflowCmp, DAG);
4300 }
4301
4302 // Convert:
4303 //
4304 // (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond)
4305 // (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond)
4306 //
4307 if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) {
4308 const ConstantSDNode *CMOVTrue =
4309 dyn_cast<ConstantSDNode>(Cond.getOperand(0));
4310 const ConstantSDNode *CMOVFalse =
4311 dyn_cast<ConstantSDNode>(Cond.getOperand(1));
4312
4313 if (CMOVTrue && CMOVFalse) {
4314 unsigned CMOVTrueVal = CMOVTrue->getZExtValue();
4315 unsigned CMOVFalseVal = CMOVFalse->getZExtValue();
4316
4317 SDValue True;
4318 SDValue False;
4319 if (CMOVTrueVal == 1 && CMOVFalseVal == 0) {
4320 True = SelectTrue;
4321 False = SelectFalse;
4322 } else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) {
4323 True = SelectFalse;
4324 False = SelectTrue;
4325 }
4326
4327 if (True.getNode() && False.getNode()) {
4328 EVT VT = Op.getValueType();
4329 SDValue ARMcc = Cond.getOperand(2);
4330 SDValue CCR = Cond.getOperand(3);
4331 SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG);
4332 assert(True.getValueType() == VT);
4333 return getCMOV(dl, VT, True, False, ARMcc, CCR, Cmp, DAG);
4334 }
4335 }
4336 }
4337
4338 // ARM's BooleanContents value is UndefinedBooleanContent. Mask out the
4339 // undefined bits before doing a full-word comparison with zero.
4340 Cond = DAG.getNode(ISD::AND, dl, Cond.getValueType(), Cond,
4341 DAG.getConstant(1, dl, Cond.getValueType()));
4342
4343 return DAG.getSelectCC(dl, Cond,
4344 DAG.getConstant(0, dl, Cond.getValueType()),
4345 SelectTrue, SelectFalse, ISD::SETNE);
4346}
4347
4349 bool &swpCmpOps, bool &swpVselOps) {
4350 // Start by selecting the GE condition code for opcodes that return true for
4351 // 'equality'
4352 if (CC == ISD::SETUGE || CC == ISD::SETOGE || CC == ISD::SETOLE ||
4353 CC == ISD::SETULE || CC == ISD::SETGE || CC == ISD::SETLE)
4354 CondCode = ARMCC::GE;
4355
4356 // and GT for opcodes that return false for 'equality'.
4357 else if (CC == ISD::SETUGT || CC == ISD::SETOGT || CC == ISD::SETOLT ||
4358 CC == ISD::SETULT || CC == ISD::SETGT || CC == ISD::SETLT)
4359 CondCode = ARMCC::GT;
4360
4361 // Since we are constrained to GE/GT, if the opcode contains 'less', we need
4362 // to swap the compare operands.
4363 if (CC == ISD::SETOLE || CC == ISD::SETULE || CC == ISD::SETOLT ||
4364 CC == ISD::SETULT || CC == ISD::SETLE || CC == ISD::SETLT)
4365 swpCmpOps = true;
4366
4367 // Both GT and GE are ordered comparisons, and return false for 'unordered'.
4368 // If we have an unordered opcode, we need to swap the operands to the VSEL
4369 // instruction (effectively negating the condition).
4370 //
4371 // This also has the effect of swapping which one of 'less' or 'greater'
4372 // returns true, so we also swap the compare operands. It also switches
4373 // whether we return true for 'equality', so we compensate by picking the
4374 // opposite condition code to our original choice.
4375 if (CC == ISD::SETULE || CC == ISD::SETULT || CC == ISD::SETUGE ||
4376 CC == ISD::SETUGT) {
4377 swpCmpOps = !swpCmpOps;
4378 swpVselOps = !swpVselOps;
4379 CondCode = CondCode == ARMCC::GT ? ARMCC::GE : ARMCC::GT;
4380 }
4381
4382 // 'ordered' is 'anything but unordered', so use the VS condition code and
4383 // swap the VSEL operands.
4384 if (CC == ISD::SETO) {
4385 CondCode = ARMCC::VS;
4386 swpVselOps = true;
4387 }
4388
4389 // 'unordered or not equal' is 'anything but equal', so use the EQ condition
4390 // code and swap the VSEL operands. Also do this if we don't care about the
4391 // unordered case.
4392 if (CC == ISD::SETUNE || CC == ISD::SETNE) {
4393 CondCode = ARMCC::EQ;
4394 swpVselOps = true;
4395 }
4396}
4397
4398SDValue ARMTargetLowering::getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal,
4399 SDValue TrueVal, SDValue ARMcc, SDValue CCR,
4400 SDValue Cmp, SelectionDAG &DAG) const {
4401 if (!Subtarget->hasFP64() && VT == MVT::f64) {
4402 FalseVal = DAG.getNode(ARMISD::VMOVRRD, dl,
4403 DAG.getVTList(MVT::i32, MVT::i32), FalseVal);
4404 TrueVal = DAG.getNode(ARMISD::VMOVRRD, dl,
4405 DAG.getVTList(MVT::i32, MVT::i32), TrueVal);
4406
4407 SDValue TrueLow = TrueVal.getValue(0);
4408 SDValue TrueHigh = TrueVal.getValue(1);
4409 SDValue FalseLow = FalseVal.getValue(0);
4410 SDValue FalseHigh = FalseVal.getValue(1);
4411
4412 SDValue Low = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseLow, TrueLow,
4413 ARMcc, CCR, Cmp);
4414 SDValue High = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseHigh, TrueHigh,
4415 ARMcc, CCR, duplicateCmp(Cmp, DAG));
4416
4417 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Low, High);
4418 } else {
4419 return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,
4420 Cmp);
4421 }
4422}
4423
4424static bool isGTorGE(ISD::CondCode CC) {
4425 return CC == ISD::SETGT || CC == ISD::SETGE;
4426}
4427
4428static bool isLTorLE(ISD::CondCode CC) {
4429 return CC == ISD::SETLT || CC == ISD::SETLE;
4430}
4431
4432// See if a conditional (LHS CC RHS ? TrueVal : FalseVal) is lower-saturating.
4433// All of these conditions (and their <= and >= counterparts) will do:
4434// x < k ? k : x
4435// x > k ? x : k
4436// k < x ? x : k
4437// k > x ? k : x
4438static bool isLowerSaturate(const SDValue LHS, const SDValue RHS,
4439 const SDValue TrueVal, const SDValue FalseVal,
4440 const ISD::CondCode CC, const SDValue K) {
4441 return (isGTorGE(CC) &&
4442 ((K == LHS && K == TrueVal) || (K == RHS && K == FalseVal))) ||
4443 (isLTorLE(CC) &&
4444 ((K == RHS && K == TrueVal) || (K == LHS && K == FalseVal)));
4445}
4446
4447// Similar to isLowerSaturate(), but checks for upper-saturating conditions.
4448static bool isUpperSaturate(const SDValue LHS, const SDValue RHS,
4449 const SDValue TrueVal, const SDValue FalseVal,
4450 const ISD::CondCode CC, const SDValue K) {
4451 return (isGTorGE(CC) &&
4452 ((K == RHS && K == TrueVal) || (K == LHS && K == FalseVal))) ||
4453 (isLTorLE(CC) &&
4454 ((K == LHS && K == TrueVal) || (K == RHS && K == FalseVal)));
4455}
4456
4457// Check if two chained conditionals could be converted into SSAT or USAT.
4458//
4459// SSAT can replace a set of two conditional selectors that bound a number to an
4460// interval of type [k, ~k] when k + 1 is a power of 2. Here are some examples:
4461//
4462// x < -k ? -k : (x > k ? k : x)
4463// x < -k ? -k : (x < k ? x : k)
4464// x > -k ? (x > k ? k : x) : -k
4465// x < k ? (x < -k ? -k : x) : k
4466// etc.
4467//
4468// USAT works similarily to SSAT but bounds on the interval [0, k] where k + 1 is
4469// a power of 2.
4470//
4471// It returns true if the conversion can be done, false otherwise.
4472// Additionally, the variable is returned in parameter V, the constant in K and
4473// usat is set to true if the conditional represents an unsigned saturation
4474static bool isSaturatingConditional(const SDValue &Op, SDValue &V,
4475 uint64_t &K, bool &usat) {
4476 SDValue LHS1 = Op.getOperand(0);
4477 SDValue RHS1 = Op.getOperand(1);
4478 SDValue TrueVal1 = Op.getOperand(2);
4479 SDValue FalseVal1 = Op.getOperand(3);
4480 ISD::CondCode CC1 = cast<CondCodeSDNode>(Op.getOperand(4))->get();
4481
4482 const SDValue Op2 = isa<ConstantSDNode>(TrueVal1) ? FalseVal1 : TrueVal1;
4483 if (Op2.getOpcode() != ISD::SELECT_CC)
4484 return false;
4485
4486 SDValue LHS2 = Op2.getOperand(0);
4487 SDValue RHS2 = Op2.getOperand(1);
4488 SDValue TrueVal2 = Op2.getOperand(2);
4489 SDValue FalseVal2 = Op2.getOperand(3);
4490 ISD::CondCode CC2 = cast<CondCodeSDNode>(Op2.getOperand(4))->get();
4491
4492 // Find out which are the constants and which are the variables
4493 // in each conditional
4494 SDValue *K1 = isa<ConstantSDNode>(LHS1) ? &LHS1 : isa<ConstantSDNode>(RHS1)
4495 ? &RHS1
4496 : nullptr;
4497 SDValue *K2 = isa<ConstantSDNode>(LHS2) ? &LHS2 : isa<ConstantSDNode>(RHS2)
4498 ? &RHS2
4499 : nullptr;
4500 SDValue K2Tmp = isa<ConstantSDNode>(TrueVal2) ? TrueVal2 : FalseVal2;
4501 SDValue V1Tmp = (K1 && *K1 == LHS1) ? RHS1 : LHS1;
4502 SDValue V2Tmp = (K2 && *K2 == LHS2) ? RHS2 : LHS2;
4503 SDValue V2 = (K2Tmp == TrueVal2) ? FalseVal2 : TrueVal2;
4504
4505 // We must detect cases where the original operations worked with 16- or
4506 // 8-bit values. In such case, V2Tmp != V2 because the comparison operations
4507 // must work with sign-extended values but the select operations return
4508 // the original non-extended value.
4509 SDValue V2TmpReg = V2Tmp;
4510 if (V2Tmp->getOpcode() == ISD::SIGN_EXTEND_INREG)
4511 V2TmpReg = V2Tmp->getOperand(0);
4512
4513 // Check that the registers and the constants have the correct values
4514 // in both conditionals
4515 if (!K1 || !K2 || *K1 == Op2 || *K2 != K2Tmp || V1Tmp != V2Tmp ||
4516 V2TmpReg != V2)
4517 return false;
4518
4519 // Figure out which conditional is saturating the lower/upper bound.
4520 const SDValue *LowerCheckOp =
4521 isLowerSaturate(LHS1, RHS1, TrueVal1, FalseVal1, CC1, *K1)
4522 ? &Op
4523 : isLowerSaturate(LHS2, RHS2, TrueVal2, FalseVal2, CC2, *K2)
4524 ? &Op2
4525 : nullptr;
4526 const SDValue *UpperCheckOp =
4527 isUpperSaturate(LHS1, RHS1, TrueVal1, FalseVal1, CC1, *K1)
4528 ? &Op
4529 : isUpperSaturate(LHS2, RHS2, TrueVal2, FalseVal2, CC2, *K2)
4530 ? &Op2
4531 : nullptr;
4532
4533 if (!UpperCheckOp || !LowerCheckOp || LowerCheckOp == UpperCheckOp)
4534 return false;
4535
4536 // Check that the constant in the lower-bound check is
4537 // the opposite of the constant in the upper-bound check
4538 // in 1's complement.
4539 int64_t Val1 = cast<ConstantSDNode>(*K1)->getSExtValue();
4540 int64_t Val2 = cast<ConstantSDNode>(*K2)->getSExtValue();
4541 int64_t PosVal = std::max(Val1, Val2);
4542 int64_t NegVal = std::min(Val1, Val2);
4543
4544 if (((Val1 > Val2 && UpperCheckOp == &Op) ||
4545 (Val1 < Val2 && UpperCheckOp == &Op2)) &&
4546 isPowerOf2_64(PosVal + 1)) {
4547
4548 // Handle the difference between USAT (unsigned) and SSAT (signed) saturation
4549 if (Val1 == ~Val2)
4550 usat = false;
4551 else if (NegVal == 0)
4552 usat = true;
4553 else
4554 return false;
4555
4556 V = V2;
4557 K = (uint64_t)PosVal; // At this point, PosVal is guaranteed to be positive
4558
4559 return true;
4560 }
4561
4562 return false;
4563}
4564
4565// Check if a condition of the type x < k ? k : x can be converted into a
4566// bit operation instead of conditional moves.
4567// Currently this is allowed given:
4568// - The conditions and values match up
4569// - k is 0 or -1 (all ones)
4570// This function will not check the last condition, thats up to the caller
4571// It returns true if the transformation can be made, and in such case
4572// returns x in V, and k in SatK.
4574 SDValue &SatK)
4575{
4576 SDValue LHS = Op.getOperand(0);
4577 SDValue RHS = Op.getOperand(1);
4578 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
4579 SDValue TrueVal = Op.getOperand(2);
4580 SDValue FalseVal = Op.getOperand(3);
4581
4582 SDValue *K = isa<ConstantSDNode>(LHS) ? &LHS : isa<ConstantSDNode>(RHS)
4583 ? &RHS
4584 : nullptr;
4585
4586 // No constant operation in comparison, early out
4587 if (!K)
4588 return false;
4589
4590 SDValue KTmp = isa<ConstantSDNode>(TrueVal) ? TrueVal : FalseVal;
4591 V = (KTmp == TrueVal) ? FalseVal : TrueVal;
4592 SDValue VTmp = (K && *K == LHS) ? RHS : LHS;
4593
4594 // If the constant on left and right side, or variable on left and right,
4595 // does not match, early out
4596 if (*K != KTmp || V != VTmp)
4597 return false;
4598
4599 if (isLowerSaturate(LHS, RHS, TrueVal, FalseVal, CC, *K)) {
4600 SatK = *K;
4601 return true;
4602 }
4603
4604 return false;
4605}
4606
4607bool ARMTargetLowering::isUnsupportedFloatingType(EVT VT) const {
4608 if (VT == MVT::f32)
4609 return !Subtarget->hasVFP2Base();
4610 if (VT == MVT::f64)
4611 return !Subtarget->hasFP64();
4612 if (VT == MVT::f16)
4613 return !Subtarget->hasFullFP16();
4614 return false;
4615}
4616
4617SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
4618 EVT VT = Op.getValueType();
4619 SDLoc dl(Op);
4620
4621 // Try to convert two saturating conditional selects into a single SSAT
4622 SDValue SatValue;
4623 uint64_t SatConstant;
4624 bool SatUSat;
4625 if (((!Subtarget->isThumb() && Subtarget->hasV6Ops()) || Subtarget->isThumb2()) &&
4626 isSaturatingConditional(Op, SatValue, SatConstant, SatUSat)) {
4627 if (SatUSat)
4628 return DAG.getNode(ARMISD::USAT, dl, VT, SatValue,
4629 DAG.getConstant(countTrailingOnes(SatConstant), dl, VT));
4630 else
4631 return DAG.getNode(ARMISD::SSAT, dl, VT, SatValue,
4632 DAG.getConstant(countTrailingOnes(SatConstant), dl, VT));
4633 }
4634
4635 // Try to convert expressions of the form x < k ? k : x (and similar forms)
4636 // into more efficient bit operations, which is possible when k is 0 or -1
4637 // On ARM and Thumb-2 which have flexible operand 2 this will result in
4638 // single instructions. On Thumb the shift and the bit operation will be two
4639 // instructions.
4640 // Only allow this transformation on full-width (32-bit) operations
4641 SDValue LowerSatConstant;
4642 if (VT == MVT::i32 &&
4643 isLowerSaturatingConditional(Op, SatValue, LowerSatConstant)) {
4644 SDValue ShiftV = DAG.getNode(ISD::SRA, dl, VT, SatValue,
4645 DAG.getConstant(31, dl, VT));
4646 if (isNullConstant(LowerSatConstant)) {
4647 SDValue NotShiftV = DAG.getNode(ISD::XOR, dl, VT, ShiftV,
4648 DAG.getAllOnesConstant(dl, VT));
4649 return DAG.getNode(ISD::AND, dl, VT, SatValue, NotShiftV);
4650 } else if (isAllOnesConstant(LowerSatConstant))
4651 return DAG.getNode(ISD::OR, dl, VT, SatValue, ShiftV);
4652 }
4653
4654 SDValue LHS = Op.getOperand(0);
4655 SDValue RHS = Op.getOperand(1);
4656 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
4657 SDValue TrueVal = Op.getOperand(2);
4658 SDValue FalseVal = Op.getOperand(3);
4659
4660 if (isUnsupportedFloatingType(LHS.getValueType())) {
4662 DAG, LHS.getValueType(), LHS, RHS, CC, dl);
4663
4664 // If softenSetCCOperands only returned one value, we should compare it to
4665 // zero.
4666 if (!RHS.getNode()) {
4667 RHS = DAG.getConstant(0, dl, LHS.getValueType());
4668 CC = ISD::SETNE;
4669 }
4670 }
4671
4672 if (LHS.getValueType() == MVT::i32) {
4673 // Try to generate VSEL on ARMv8.
4674 // The VSEL instruction can't use all the usual ARM condition
4675 // codes: it only has two bits to select the condition code, so it's
4676 // constrained to use only GE, GT, VS and EQ.
4677 //
4678 // To implement all the various ISD::SETXXX opcodes, we sometimes need to
4679 // swap the operands of the previous compare instruction (effectively
4680 // inverting the compare condition, swapping 'less' and 'greater') and
4681 // sometimes need to swap the operands to the VSEL (which inverts the
4682 // condition in the sense of firing whenever the previous condition didn't)
4683 if (Subtarget->hasFPARMv8Base() && (TrueVal.getValueType() == MVT::f16 ||
4684 TrueVal.getValueType() == MVT::f32 ||
4685 TrueVal.getValueType() == MVT::f64)) {
4687 if (CondCode == ARMCC::LT || CondCode == ARMCC::LE ||
4688 CondCode == ARMCC::VC || CondCode == ARMCC::NE) {
4689 CC = ISD::getSetCCInverse(CC, true);
4690 std::swap(TrueVal, FalseVal);
4691 }
4692 }
4693
4694 SDValue ARMcc;
4695 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4696 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
4697 // Choose GE over PL, which vsel does now support
4698 if (cast<ConstantSDNode>(ARMcc)->getZExtValue() == ARMCC::PL)
4699 ARMcc = DAG.getConstant(ARMCC::GE, dl, MVT::i32);
4700 return getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG);
4701 }
4702
4703 ARMCC::CondCodes CondCode, CondCode2;
4704 bool InvalidOnQNaN;
4705 FPCCToARMCC(CC, CondCode, CondCode2, InvalidOnQNaN);
4706
4707 // Normalize the fp compare. If RHS is zero we prefer to keep it there so we
4708 // match CMPFPw0 instead of CMPFP, though we don't do this for f16 because we
4709 // must use VSEL (limited condition codes), due to not having conditional f16
4710 // moves.
4711 if (Subtarget->hasFPARMv8Base() &&
4712 !(isFloatingPointZero(RHS) && TrueVal.getValueType() != MVT::f16) &&
4713 (TrueVal.getValueType() == MVT::f16 ||
4714 TrueVal.getValueType() == MVT::f32 ||
4715 TrueVal.getValueType() == MVT::f64)) {
4716 bool swpCmpOps = false;
4717 bool swpVselOps = false;
4718 checkVSELConstraints(CC, CondCode, swpCmpOps, swpVselOps);
4719
4720 if (CondCode == ARMCC::GT || CondCode == ARMCC::GE ||
4721 CondCode == ARMCC::VS || CondCode == ARMCC::EQ) {
4722 if (swpCmpOps)
4723 std::swap(LHS, RHS);
4724 if (swpVselOps)
4725 std::swap(TrueVal, FalseVal);
4726 }
4727 }
4728
4729 SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
4730 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl, InvalidOnQNaN);
4731 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4732 SDValue Result = getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG);
4733 if (CondCode2 != ARMCC::AL) {
4734 SDValue ARMcc2 = DAG.getConstant(CondCode2, dl, MVT::i32);
4735 // FIXME: Needs another CMP because flag can have but one use.
4736 SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl, InvalidOnQNaN);
4737 Result = getCMOV(dl, VT, Result, TrueVal, ARMcc2, CCR, Cmp2, DAG);
4738 }
4739 return Result;
4740}
4741
4742/// canChangeToInt - Given the fp compare operand, return true if it is suitable
4743/// to morph to an integer compare sequence.
4744static bool canChangeToInt(SDValue Op, bool &SeenZero,
4745 const ARMSubtarget *Subtarget) {
4746 SDNode *N = Op.getNode();
4747 if (!N->hasOneUse())
4748 // Otherwise it requires moving the value from fp to integer registers.
4749 return false;
4750 if (!N->getNumValues())
4751 return false;
4752 EVT VT = Op.getValueType();
4753 if (VT != MVT::f32 && !Subtarget->isFPBrccSlow())
4754 // f32 case is generally profitable. f64 case only makes sense when vcmpe +
4755 // vmrs are very slow, e.g. cortex-a8.
4756 return false;
4757
4758 if (isFloatingPointZero(Op)) {
4759 SeenZero = true;
4760 return true;
4761 }
4762 return ISD::isNormalLoad(N);
4763}
4764
4766 if (isFloatingPointZero(Op))
4767 return DAG.getConstant(0, SDLoc(Op), MVT::i32);
4768
4769 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op))
4770 return DAG.getLoad(MVT::i32, SDLoc(Op), Ld->getChain(), Ld->getBasePtr(),
4771 Ld->getPointerInfo(), Ld->getAlignment(),
4772 Ld->getMemOperand()->getFlags());
4773
4774 llvm_unreachable("Unknown VFP cmp argument!");
4775}
4776
4778 SDValue &RetVal1, SDValue &RetVal2) {
4779 SDLoc dl(Op);
4780
4781 if (isFloatingPointZero(Op)) {
4782 RetVal1 = DAG.getConstant(0, dl, MVT::i32);
4783 RetVal2 = DAG.getConstant(0, dl, MVT::i32);
4784 return;
4785 }
4786
4787 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) {
4788 SDValue Ptr = Ld->getBasePtr();
4789 RetVal1 =
4790 DAG.getLoad(MVT::i32, dl, Ld->getChain(), Ptr, Ld->getPointerInfo(),
4791 Ld->getAlignment(), Ld->getMemOperand()->getFlags());
4792
4793 EVT PtrType = Ptr.getValueType();
4794 unsigned NewAlign = MinAlign(Ld->getAlignment(), 4);
4795 SDValue NewPtr = DAG.getNode(ISD::ADD, dl,
4796 PtrType, Ptr, DAG.getConstant(4, dl, PtrType));
4797 RetVal2 = DAG.getLoad(MVT::i32, dl, Ld->getChain(), NewPtr,
4798 Ld->getPointerInfo().getWithOffset(4), NewAlign,
4799 Ld->getMemOperand()->getFlags());
4800 return;
4801 }
4802
4803 llvm_unreachable("Unknown VFP cmp argument!");
4804}
4805
4806/// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some
4807/// f32 and even f64 comparisons to integer ones.
4808SDValue
4809ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
4810 SDValue Chain = Op.getOperand(0);
4811 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
4812 SDValue LHS = Op.getOperand(2);
4813 SDValue RHS = Op.getOperand(3);
4814 SDValue Dest = Op.getOperand(4);
4815 SDLoc dl(Op);
4816
4817 bool LHSSeenZero = false;
4818 bool LHSOk = canChangeToInt(LHS, LHSSeenZero, Subtarget);
4819 bool RHSSeenZero = false;
4820 bool RHSOk = canChangeToInt(RHS, RHSSeenZero, Subtarget);
4821 if (LHSOk && RHSOk && (LHSSeenZero || RHSSeenZero)) {
4822 // If unsafe fp math optimization is enabled and there are no other uses of
4823 // the CMP operands, and the condition code is EQ or NE, we can optimize it
4824 // to an integer comparison.
4825 if (CC == ISD::SETOEQ)
4826 CC = ISD::SETEQ;
4827 else if (CC == ISD::SETUNE)
4828 CC = ISD::SETNE;
4829
4830 SDValue Mask = DAG.getConstant(0x7fffffff, dl, MVT::i32);
4831 SDValue ARMcc;
4832 if (LHS.getValueType() == MVT::f32) {
4833 LHS = DAG.getNode(ISD::AND, dl, MVT::i32,
4834 bitcastf32Toi32(LHS, DAG), Mask);
4835 RHS = DAG.getNode(ISD::AND, dl, MVT::i32,
4836 bitcastf32Toi32(RHS, DAG), Mask);
4837 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
4838 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4839 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
4840 Chain, Dest, ARMcc, CCR, Cmp);
4841 }
4842
4843 SDValue LHS1, LHS2;
4844 SDValue RHS1, RHS2;
4845 expandf64Toi32(LHS, DAG, LHS1, LHS2);
4846 expandf64Toi32(RHS, DAG, RHS1, RHS2);
4847 LHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, LHS2, Mask);
4848 RHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, RHS2, Mask);
4850 ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
4851 SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
4852 SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest };
4853 return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops);
4854 }
4855
4856 return SDValue();
4857}
4858
4859SDValue ARMTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
4860 SDValue Chain = Op.getOperand(0);
4861 SDValue Cond = Op.getOperand(1);
4862 SDValue Dest = Op.getOperand(2);
4863 SDLoc dl(Op);
4864
4865 // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch
4866 // instruction.
4867 unsigned Opc = Cond.getOpcode();
4868 bool OptimizeMul = (Opc == ISD::SMULO || Opc == ISD::UMULO) &&
4869 !Subtarget->isThumb1Only();
4870 if (Cond.getResNo() == 1 &&
4871 (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
4872 Opc == ISD::USUBO || OptimizeMul)) {
4873 // Only lower legal XALUO ops.
4874 if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0)))
4875 return SDValue();
4876
4877 // The actual operation with overflow check.
4878 SDValue Value, OverflowCmp;
4879 SDValue ARMcc;
4880 std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc);
4881
4882 // Reverse the condition code.
4884 (ARMCC::CondCodes)cast<const ConstantSDNode>(ARMcc)->getZExtValue();
4886 ARMcc = DAG.getConstant(CondCode, SDLoc(ARMcc), MVT::i32);
4887 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4888
4889 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, CCR,
4890 OverflowCmp);
4891 }
4892
4893 return SDValue();
4894}
4895
4896SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
4897 SDValue Chain = Op.getOperand(0);
4898 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
4899 SDValue LHS = Op.getOperand(2);
4900 SDValue RHS = Op.getOperand(3);
4901 SDValue Dest = Op.getOperand(4);
4902 SDLoc dl(Op);
4903
4904 if (isUnsupportedFloatingType(LHS.getValueType())) {
4906 DAG, LHS.getValueType(), LHS, RHS, CC, dl);
4907
4908 // If softenSetCCOperands only returned one value, we should compare it to
4909 // zero.
4910 if (!RHS.getNode()) {
4911 RHS = DAG.getConstant(0, dl, LHS.getValueType());
4912 CC = ISD::SETNE;
4913 }
4914 }
4915
4916 // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch
4917 // instruction.
4918 unsigned Opc = LHS.getOpcode();
4919 bool OptimizeMul = (Opc == ISD::SMULO || Opc == ISD::UMULO) &&
4920 !Subtarget->isThumb1Only();
4921 if (LHS.getResNo() == 1 && (isOneConstant(RHS) || isNullConstant(RHS)) &&
4922 (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
4923 Opc == ISD::USUBO || OptimizeMul) &&
4924 (CC == ISD::SETEQ || CC == ISD::SETNE)) {
4925 // Only lower legal XALUO ops.
4927 return SDValue();
4928
4929 // The actual operation with overflow check.
4930 SDValue Value, OverflowCmp;
4931 SDValue ARMcc;
4932 std::tie(Value, OverflowCmp) = getARMXALUOOp(LHS.getValue(0), DAG, ARMcc);
4933
4934 if ((CC == ISD::SETNE) != isOneConstant(RHS)) {
4935 // Reverse the condition code.
4937 (ARMCC::CondCodes)cast<const ConstantSDNode>(ARMcc)->getZExtValue();
4939 ARMcc = DAG.getConstant(CondCode, SDLoc(ARMcc), MVT::i32);
4940 }
4941 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4942
4943 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, CCR,
4944 OverflowCmp);
4945 }
4946
4947 if (LHS.getValueType() == MVT::i32) {
4948 SDValue ARMcc;
4949 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
4950 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4951 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
4952 Chain, Dest, ARMcc, CCR, Cmp);
4953 }
4954
4955 if (getTargetMachine().Options.UnsafeFPMath &&
4956 (CC == ISD::SETEQ || CC == ISD::SETOEQ ||
4957 CC == ISD::SETNE || CC == ISD::SETUNE)) {
4958 if (SDValue Result = OptimizeVFPBrcond(Op, DAG))
4959 return Result;
4960 }
4961
4962 ARMCC::CondCodes CondCode, CondCode2;
4963 bool InvalidOnQNaN;
4964 FPCCToARMCC(CC, CondCode, CondCode2, InvalidOnQNaN);
4965
4966 SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
4967 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl, InvalidOnQNaN);
4968 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4969 SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
4970 SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp };
4971 SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops);
4972 if (CondCode2 != ARMCC::AL) {
4973 ARMcc = DAG.getConstant(CondCode2, dl, MVT::i32);
4974 SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) };
4975 Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops);
4976 }
4977 return Res;
4978}
4979
4980SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
4981 SDValue Chain = Op.getOperand(0);
4982 SDValue Table = Op.getOperand(1);
4983 SDValue Index = Op.getOperand(2);
4984 SDLoc dl(Op);
4985
4986 EVT PTy = getPointerTy(DAG.getDataLayout());
4987 JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
4988 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy);
4989 Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI);
4990 Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, dl, PTy));
4991 SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Table, Index);
4992 if (Subtarget->isThumb2() || (Subtarget->hasV8MBaselineOps() && Subtarget->isThumb())) {
4993 // Thumb2 and ARMv8-M use a two-level jump. That is, it jumps into the jump table
4994 // which does another jump to the destination. This also makes it easier
4995 // to translate it to TBB / TBH later (Thumb2 only).
4996 // FIXME: This might not work if the function is extremely large.
4997 return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain,
4998 Addr, Op.getOperand(2), JTI);
4999 }
5000 if (isPositionIndependent() || Subtarget->isROPI()) {
5001 Addr =
5002 DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr,
5004 Chain = Addr.getValue(1);
5005 Addr = DAG.getNode(ISD::ADD, dl, PTy, Table, Addr);
5006 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI);
5007 } else {
5008 Addr =
5009 DAG.getLoad(PTy, dl, Chain, Addr,
5011 Chain = Addr.getValue(1);
5012 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI);
5013 }
5014}
5015
5017 EVT VT = Op.getValueType();
5018 SDLoc dl(Op);
5019
5020 if (Op.getValueType().getVectorElementType() == MVT::i32) {
5021 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::f32)
5022 return Op;
5023 return DAG.UnrollVectorOp(Op.getNode());
5024 }
5025
5026 const bool HasFullFP16 =
5027 static_cast<const ARMSubtarget&>(DAG.getSubtarget()).hasFullFP16();
5028
5029 EVT NewTy;
5030 const EVT OpTy = Op.getOperand(0).getValueType();
5031 if (OpTy == MVT::v4f32)
5032 NewTy = MVT::v4i32;
5033 else if (OpTy == MVT::v4f16 && HasFullFP16)
5034 NewTy = MVT::v4i16;
5035 else if (OpTy == MVT::v8f16 && HasFullFP16)
5036 NewTy = MVT::v8i16;
5037 else
5038 llvm_unreachable("Invalid type for custom lowering!");
5039
5040 if (VT != MVT::v4i16 && VT != MVT::v8i16)
5041 return DAG.UnrollVectorOp(Op.getNode());
5042
5043 Op = DAG.getNode(Op.getOpcode(), dl, NewTy, Op.getOperand(0));
5044 return DAG.getNode(ISD::TRUNCATE, dl, VT, Op);
5045}
5046
5047SDValue ARMTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
5048 EVT VT = Op.getValueType();
5049 if (VT.isVector())
5050 return LowerVectorFP_TO_INT(Op, DAG);
5051 if (isUnsupportedFloatingType(Op.getOperand(0).getValueType())) {
5052 RTLIB::Libcall LC;
5053 if (Op.getOpcode() == ISD::FP_TO_SINT)
5054 LC = RTLIB::getFPTOSINT(Op.getOperand(0).getValueType(),
5055 Op.getValueType());
5056 else
5057 LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(),
5058 Op.getValueType());
5059 return makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(0),
5060 /*isSigned*/ false, SDLoc(Op)).first;
5061 }
5062
5063 return Op;
5064}
5065
5067 EVT VT = Op.getValueType();
5068 SDLoc dl(Op);
5069
5070 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i32) {
5071 if (VT.getVectorElementType() == MVT::f32)
5072 return Op;
5073 return DAG.UnrollVectorOp(Op.getNode());
5074 }
5075
5076 assert((Op.getOperand(0).getValueType() == MVT::v4i16 ||
5077 Op.getOperand(0).getValueType() == MVT::v8i16) &&
5078 "Invalid type for custom lowering!");
5079
5080 const bool HasFullFP16 =
5081 static_cast<const ARMSubtarget&>(DAG.getSubtarget()).hasFullFP16();
5082
5083 EVT DestVecType;
5084 if (VT == MVT::v4f32)
5085 DestVecType = MVT::v4i32;
5086 else if (VT == MVT::v4f16 && HasFullFP16)
5087 DestVecType = MVT::v4i16;
5088 else if (VT == MVT::v8f16 && HasFullFP16)
5089 DestVecType = MVT::v8i16;
5090 else
5091 return DAG.UnrollVectorOp(Op.getNode());
5092
5093 unsigned CastOpc;
5094 unsigned Opc;
5095 switch (Op.getOpcode()) {
5096 default: llvm_unreachable("Invalid opcode!");
5097 case ISD::SINT_TO_FP:
5098 CastOpc = ISD::SIGN_EXTEND;
5099 Opc = ISD::SINT_TO_FP;
5100 break;
5101 case ISD::UINT_TO_FP:
5102 CastOpc = ISD::ZERO_EXTEND;
5103 Opc = ISD::UINT_TO_FP;
5104 break;
5105 }
5106
5107 Op = DAG.getNode(CastOpc, dl, DestVecType, Op.getOperand(0));
5108 return DAG.getNode(Opc, dl, VT, Op);
5109}
5110
5111SDValue ARMTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const {
5112 EVT VT = Op.getValueType();
5113 if (VT.isVector())
5114 return LowerVectorINT_TO_FP(Op, DAG);
5115 if (isUnsupportedFloatingType(VT)) {
5116 RTLIB::Libcall LC;
5117 if (Op.getOpcode() == ISD::SINT_TO_FP)
5118 LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(),
5119 Op.getValueType());
5120 else
5121 LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(),
5122 Op.getValueType());
5123 return makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(0),
5124 /*isSigned*/ false, SDLoc(Op)).first;
5125 }
5126
5127 return Op;
5128}
5129
5130SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
5131 // Implement fcopysign with a fabs and a conditional fneg.
5132 SDValue Tmp0 = Op.getOperand(0);
5133 SDValue Tmp1 = Op.getOperand(1);
5134 SDLoc dl(Op);
5135 EVT VT = Op.getValueType();
5136 EVT SrcVT = Tmp1.getValueType();
5137 bool InGPR = Tmp0.getOpcode() == ISD::BITCAST ||
5138 Tmp0.getOpcode() == ARMISD::VMOVDRR;
5139 bool UseNEON = !InGPR && Subtarget->hasNEON();
5140
5141 if (UseNEON) {
5142 // Use VBSL to copy the sign bit.
5143 unsigned EncodedVal = ARM_AM::createNEONModImm(0x6, 0x80);
5145 DAG.getTargetConstant(EncodedVal, dl, MVT::i32));
5146 EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64;
5147 if (VT == MVT::f64)
5148 Mask = DAG.getNode(ARMISD::VSHLIMM, dl, OpVT,
5149 DAG.getNode(ISD::BITCAST, dl, OpVT, Mask),
5150 DAG.getConstant(32, dl, MVT::i32));
5151 else /*if (VT == MVT::f32)*/
5152 Tmp0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp0);
5153 if (SrcVT == MVT::f32) {
5154 Tmp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp1);
5155 if (VT == MVT::f64)
5156 Tmp1 = DAG.getNode(ARMISD::VSHLIMM, dl, OpVT,
5157 DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1),
5158 DAG.getConstant(32, dl, MVT::i32));
5159 } else if (VT == MVT::f32)
5160 Tmp1 = DAG.getNode(ARMISD::VSHRuIMM, dl, MVT::v1i64,
5161 DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, Tmp1),
5162 DAG.getConstant(32, dl, MVT::i32));
5163 Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0);
5164 Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1);
5165
5166 SDValue AllOnes = DAG.getTargetConstant(ARM_AM::createNEONModImm(0xe, 0xff),
5167 dl, MVT::i32);
5168 AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes);
5169 SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask,
5170 DAG.getNode(ISD::BITCAST, dl, OpVT, AllOnes));
5171
5172 SDValue Res = DAG.getNode(ISD::OR, dl, OpVT,
5173 DAG.getNode(ISD::AND, dl, OpVT, Tmp1, Mask),
5174 DAG.getNode(ISD::AND, dl, OpVT, Tmp0, MaskNot));
5175 if (VT == MVT::f32) {
5176 Res = DAG.getNode(ISD::BITCAST, dl, MVT::v2f32, Res);
5177 Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res,
5178 DAG.getConstant(0, dl, MVT::i32));
5179 } else {
5180 Res = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Res);
5181 }
5182
5183 return Res;
5184 }
5185
5186 // Bitcast operand 1 to i32.
5187 if (SrcVT == MVT::f64)
5188 Tmp1 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
5189 Tmp1).getValue(1);
5190 Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1);
5191
5192 // Or in the signbit with integer operations.
5193 SDValue Mask1 = DAG.getConstant(0x80000000, dl, MVT::i32);
5194 SDValue Mask2 = DAG.getConstant(0x7fffffff, dl, MVT::i32);
5195 Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1);
5196 if (VT == MVT::f32) {
5197 Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32,
5198 DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2);
5199 return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
5200 DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1));
5201 }
5202
5203 // f64: Or the high part with signbit and then combine two parts.
5204 Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
5205 Tmp0);
5206 SDValue Lo = Tmp0.getValue(0);
5207 SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2);
5208 Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1);
5209 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
5210}
5211
5212SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
5214 MachineFrameInfo &MFI = MF.getFrameInfo();
5215 MFI.setReturnAddressIsTaken(true);
5216
5218 return SDValue();
5219
5220 EVT VT = Op.getValueType();
5221 SDLoc dl(Op);
5222 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
5223 if (Depth) {
5224 SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
5225 SDValue Offset = DAG.getConstant(4, dl, MVT::i32);
5226 return DAG.getLoad(VT, dl, DAG.getEntryNode(),
5227 DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
5229 }
5230
5231 // Return LR, which contains the return address. Mark it an implicit live-in.
5232 unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
5233 return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
5234}
5235
5236SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
5237 const ARMBaseRegisterInfo &ARI =
5238 *static_cast<const ARMBaseRegisterInfo*>(RegInfo);
5240 MachineFrameInfo &MFI = MF.getFrameInfo();
5241 MFI.setFrameAddressIsTaken(true);
5242
5243 EVT VT = Op.getValueType();
5244 SDLoc dl(Op); // FIXME probably not meaningful
5245 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
5246 unsigned FrameReg = ARI.getFrameRegister(MF);
5247 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
5248 while (Depth--)
5249 FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
5251 return FrameAddr;
5252}
5253
5254// FIXME? Maybe this could be a TableGen attribute on some registers and
5255// this table could be generated automatically from RegInfo.
5256unsigned ARMTargetLowering::getRegisterByName(const char* RegName, EVT VT,
5257 SelectionDAG &DAG) const {
5258 unsigned Reg = StringSwitch<unsigned>(RegName)
5259 .Case("sp", ARM::SP)
5260 .Default(0);
5261 if (Reg)
5262 return Reg;
5263 report_fatal_error(Twine("Invalid register name \""
5264 + StringRef(RegName) + "\"."));
5265}
5266
5267// Result is 64 bit value so split into two 32 bit values and return as a
5268// pair of values.
5270 SelectionDAG &DAG) {
5271 SDLoc DL(N);
5272
5273 // This function is only supposed to be called for i64 type destination.
5274 assert(N->getValueType(0) == MVT::i64
5275 && "ExpandREAD_REGISTER called for non-i64 type result.");
5276
5277 SDValue Read = DAG.getNode(ISD::READ_REGISTER, DL,
5279 N->getOperand(0),
5280 N->getOperand(1));
5281
5282 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Read.getValue(0),
5283 Read.getValue(1)));
5284 Results.push_back(Read.getOperand(0));
5285}
5286
5287/// \p BC is a bitcast that is about to be turned into a VMOVDRR.
5288/// When \p DstVT, the destination type of \p BC, is on the vector
5289/// register bank and the source of bitcast, \p Op, operates on the same bank,
5290/// it might be possible to combine them, such that everything stays on the
5291/// vector register bank.
5292/// \p return The node that would replace \p BT, if the combine
5293/// is possible.
5295 SelectionDAG &DAG) {
5296 SDValue Op = BC->getOperand(0);
5297 EVT DstVT = BC->getValueType(0);
5298
5299 // The only vector instruction that can produce a scalar (remember,
5300 // since the bitcast was about to be turned into VMOVDRR, the source
5301 // type is i64) from a vector is EXTRACT_VECTOR_ELT.
5302 // Moreover, we can do this combine only if there is one use.
5303 // Finally, if the destination type is not a vector, there is not
5304 // much point on forcing everything on the vector bank.
5305 if (!DstVT.isVector() || Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
5306 !Op.hasOneUse())
5307 return SDValue();
5308
5309 // If the index is not constant, we will introduce an additional
5310 // multiply that will stick.
5311 // Give up in that case.
5312 ConstantSDNode *Index = dyn_cast<ConstantSDNode>(Op.getOperand(1));
5313 if (!Index)
5314 return SDValue();
5315 unsigned DstNumElt = DstVT.getVectorNumElements();
5316
5317 // Compute the new index.
5318 const APInt &APIntIndex = Index->getAPIntValue();
5319 APInt NewIndex(APIntIndex.getBitWidth(), DstNumElt);
5320 NewIndex *= APIntIndex;
5321 // Check if the new constant index fits into i32.
5322 if (NewIndex.getBitWidth() > 32)
5323 return SDValue();
5324
5325 // vMTy bitcast(i64 extractelt vNi64 src, i32 index) ->
5326 // vMTy extractsubvector vNxMTy (bitcast vNi64 src), i32 index*M)
5327 SDLoc dl(Op);
5328 SDValue ExtractSrc = Op.getOperand(0);
5329 EVT VecVT = EVT::getVectorVT(
5330 *DAG.getContext(), DstVT.getScalarType(),
5331 ExtractSrc.getValueType().getVectorNumElements() * DstNumElt);
5332 SDValue BitCast = DAG.getNode(ISD::BITCAST, dl, VecVT, ExtractSrc);
5333 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DstVT, BitCast,
5334 DAG.getConstant(NewIndex.getZExtValue(), dl, MVT::i32));
5335}
5336
5337/// ExpandBITCAST - If the target supports VFP, this function is called to
5338/// expand a bit convert where either the source or destination type is i64 to
5339/// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64
5340/// operand type is illegal (e.g., v2f32 for a target that doesn't support
5341/// vectors), since the legalizer won't know what to do with that.
5343 const ARMSubtarget *Subtarget) {
5344 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5345 SDLoc dl(N);
5346 SDValue Op = N->getOperand(0);
5347
5348 // This function is only supposed to be called for i64 types, either as the
5349 // source or destination of the bit convert.
5350 EVT SrcVT = Op.getValueType();
5351 EVT DstVT = N->getValueType(0);
5352 const bool HasFullFP16 = Subtarget->hasFullFP16();
5353
5354 if (SrcVT == MVT::f32 && DstVT == MVT::i32) {
5355 // FullFP16: half values are passed in S-registers, and we don't
5356 // need any of the bitcast and moves:
5357 //
5358 // t2: f32,ch = CopyFromReg t0, Register:f32 %0
5359 // t5: i32 = bitcast t2
5360 // t18: f16 = ARMISD::VMOVhr t5
5361 if (Op.getOpcode() != ISD::CopyFromReg ||
5362 Op.getValueType() != MVT::f32)
5363 return SDValue();
5364
5365 auto Move = N->use_begin();
5366 if (Move->getOpcode() != ARMISD::VMOVhr)
5367 return SDValue();
5368
5369 SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1) };
5370 SDValue Copy = DAG.getNode(ISD::CopyFromReg, SDLoc(Op), MVT::f16, Ops);
5371 DAG.ReplaceAllUsesWith(*Move, &Copy);
5372 return Copy;
5373 }
5374
5375 if (SrcVT == MVT::i16 && DstVT == MVT::f16) {
5376 if (!HasFullFP16)
5377 return SDValue();
5378 // SoftFP: read half-precision arguments:
5379 //
5380 // t2: i32,ch = ...
5381 // t7: i16 = truncate t2 <~~~~ Op
5382 // t8: f16 = bitcast t7 <~~~~ N
5383 //
5384 if (Op.getOperand(0).getValueType() == MVT::i32)
5385 return DAG.getNode(ARMISD::VMOVhr, SDLoc(Op),
5386 MVT::f16, Op.getOperand(0));
5387
5388 return SDValue();
5389 }
5390
5391 // Half-precision return values
5392 if (SrcVT == MVT::f16 && DstVT == MVT::i16) {
5393 if (!HasFullFP16)
5394 return SDValue();
5395 //
5396 // t11: f16 = fadd t8, t10
5397 // t12: i16 = bitcast t11 <~~~ SDNode N
5398 // t13: i32 = zero_extend t12
5399 // t16: ch,glue = CopyToReg t0, Register:i32 %r0, t13
5400 // t17: ch = ARMISD::RET_FLAG t16, Register:i32 %r0, t16:1
5401 //
5402 // transform this into:
5403 //
5404 // t20: i32 = ARMISD::VMOVrh t11
5405 // t16: ch,glue = CopyToReg t0, Register:i32 %r0, t20
5406 //
5407 auto ZeroExtend = N->use_begin();
5408 if (N->use_size() != 1 || ZeroExtend->getOpcode() != ISD::ZERO_EXTEND ||
5409 ZeroExtend->getValueType(0) != MVT::i32)
5410 return SDValue();
5411
5412 auto Copy = ZeroExtend->use_begin();
5413 if (Copy->getOpcode() == ISD::CopyToReg &&
5414 Copy->use_begin()->getOpcode() == ARMISD::RET_FLAG) {
5415 SDValue Cvt = DAG.getNode(ARMISD::VMOVrh, SDLoc(Op), MVT::i32, Op);
5416 DAG.ReplaceAllUsesWith(*ZeroExtend, &Cvt);
5417 return Cvt;
5418 }
5419 return SDValue();
5420 }
5421
5422 if (!(SrcVT == MVT::i64 || DstVT == MVT::i64))
5423 return SDValue();
5424
5425 // Turn i64->f64 into VMOVDRR.
5426 if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) {
5427 // Do not force values to GPRs (this is what VMOVDRR does for the inputs)
5428 // if we can combine the bitcast with its source.
5430 return Val;
5431
5432 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
5433 DAG.getConstant(0, dl, MVT::i32));
5434 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
5435 DAG.getConstant(1, dl, MVT::i32));
5436 return DAG.getNode(ISD::BITCAST, dl, DstVT,
5437 DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi));
5438 }
5439
5440 // Turn f64->i64 into VMOVRRD.
5441 if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) {
5442 SDValue Cvt;
5443 if (DAG.getDataLayout().isBigEndian() && SrcVT.isVector() &&
5444 SrcVT.getVectorNumElements() > 1)
5445 Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
5447 DAG.getNode(ARMISD::VREV64, dl, SrcVT, Op));
5448 else
5449 Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
5450 DAG.getVTList(MVT::i32, MVT::i32), Op);
5451 // Merge the pieces into a single i64 value.
5452 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1));
5453 }
5454
5455 return SDValue();
5456}
5457
5458/// getZeroVector - Returns a vector of specified type with all zero elements.
5459/// Zero vectors are used to represent vector negation and in those cases
5460/// will be implemented with the NEON VNEG instruction. However, VNEG does
5461/// not support i64 elements, so sometimes the zero vectors will need to be
5462/// explicitly constructed. Regardless, use a canonical VMOV to create the
5463/// zero vector.
5464static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl) {
5465 assert(VT.isVector() && "Expected a vector type");
5466 // The canonical modified immediate encoding of a zero vector is....0!
5467 SDValue EncodedVal = DAG.getTargetConstant(0, dl, MVT::i32);
5468 EVT VmovVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
5469 SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, EncodedVal);
5470 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
5471}
5472
5473/// LowerShiftRightParts - Lower SRA_PARTS, which returns two
5474/// i32 values and take a 2 x i32 value to shift plus a shift amount.
5475SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,
5476 SelectionDAG &DAG) const {
5477 assert(Op.getNumOperands() == 3 && "Not a double-shift!");
5478 EVT VT = Op.getValueType();
5479 unsigned VTBits = VT.getSizeInBits();
5480 SDLoc dl(Op);
5481 SDValue ShOpLo = Op.getOperand(0);
5482 SDValue ShOpHi = Op.getOperand(1);
5483 SDValue ShAmt = Op.getOperand(2);
5484 SDValue ARMcc;
5485 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5486 unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
5487
5488 assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);
5489
5490 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
5491 DAG.getConstant(VTBits, dl, MVT::i32), ShAmt);
5492 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
5493 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
5494 DAG.getConstant(VTBits, dl, MVT::i32));
5495 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
5496 SDValue LoSmallShift = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
5497 SDValue LoBigShift = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
5498 SDValue CmpLo = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
5499 ISD::SETGE, ARMcc, DAG, dl);
5500 SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift, LoBigShift,
5501 ARMcc, CCR, CmpLo);
5502
5503 SDValue HiSmallShift = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
5504 SDValue HiBigShift = Opc == ISD::SRA
5505 ? DAG.getNode(Opc, dl, VT, ShOpHi,
5506 DAG.getConstant(VTBits - 1, dl, VT))
5507 : DAG.getConstant(0, dl, VT);
5508 SDValue CmpHi = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
5509 ISD::SETGE, ARMcc, DAG, dl);
5510 SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift,
5511 ARMcc, CCR, CmpHi);
5512
5513 SDValue Ops[2] = { Lo, Hi };
5514 return DAG.getMergeValues(Ops, dl);
5515}
5516
5517/// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
5518/// i32 values and take a 2 x i32 value to shift plus a shift amount.
5519SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,
5520 SelectionDAG &DAG) const {
5521 assert(Op.getNumOperands() == 3 && "Not a double-shift!");
5522 EVT VT = Op.getValueType();
5523 unsigned VTBits = VT.getSizeInBits();
5524 SDLoc dl(Op);
5525 SDValue ShOpLo = Op.getOperand(0);
5526 SDValue ShOpHi = Op.getOperand(1);
5527 SDValue ShAmt = Op.getOperand(2);
5528 SDValue ARMcc;
5529 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5530
5531 assert(Op.getOpcode() == ISD::SHL_PARTS);
5532 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
5533 DAG.getConstant(VTBits, dl, MVT::i32), ShAmt);
5534 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
5535 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
5536 SDValue HiSmallShift = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
5537
5538 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
5539 DAG.getConstant(VTBits, dl, MVT::i32));
5540 SDValue HiBigShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
5541 SDValue CmpHi = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
5542 ISD::SETGE, ARMcc, DAG, dl);
5543 SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift,
5544 ARMcc, CCR, CmpHi);
5545
5546 SDValue CmpLo = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
5547 ISD::SETGE, ARMcc, DAG, dl);
5548 SDValue LoSmallShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
5549 SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift,
5550 DAG.getConstant(0, dl, VT), ARMcc, CCR, CmpLo);
5551
5552 SDValue Ops[2] = { Lo, Hi };
5553 return DAG.getMergeValues(Ops, dl);
5554}
5555
5556SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
5557 SelectionDAG &DAG) const {
5558 // The rounding mode is in bits 23:22 of the FPSCR.
5559 // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
5560 // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
5561 // so that the shift + and get folded into a bitfield extract.
5562 SDLoc dl(Op);
5563 SDValue Ops[] = { DAG.getEntryNode(),
5564 DAG.getConstant(Intrinsic::arm_get_fpscr, dl, MVT::i32) };
5565
5566 SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_W_CHAIN, dl, MVT::i32, Ops);
5567 SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR,
5568 DAG.getConstant(1U << 22, dl, MVT::i32));
5569 SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
5570 DAG.getConstant(22, dl, MVT::i32));
5571 return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
5572 DAG.getConstant(3, dl, MVT::i32));
5573}
5574
5576 const ARMSubtarget *ST) {
5577 SDLoc dl(N);
5578 EVT VT = N->getValueType(0);
5579 if (VT.isVector()) {
5580 assert(ST->hasNEON());
5581
5582 // Compute the least significant set bit: LSB = X & -X
5583 SDValue X = N->getOperand(0);
5584 SDValue NX = DAG.getNode(ISD::SUB, dl, VT, getZeroVector(VT, DAG, dl), X);
5585 SDValue LSB = DAG.getNode(ISD::AND, dl, VT, X, NX);
5586
5587 EVT ElemTy = VT.getVectorElementType();
5588
5589 if (ElemTy == MVT::i8) {
5590 // Compute with: cttz(x) = ctpop(lsb - 1)
5591 SDValue One = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
5592 DAG.getTargetConstant(1, dl, ElemTy));
5593 SDValue Bits = DAG.getNode(ISD::SUB, dl, VT, LSB, One);
5594 return DAG.getNode(ISD::CTPOP, dl, VT, Bits);
5595 }
5596
5597 if ((ElemTy == MVT::i16 || ElemTy == MVT::i32) &&
5598 (N->getOpcode() == ISD::CTTZ_ZERO_UNDEF)) {
5599 // Compute with: cttz(x) = (width - 1) - ctlz(lsb), if x != 0
5600 unsigned NumBits = ElemTy.getSizeInBits();
5601 SDValue WidthMinus1 =
5602 DAG.getNode(ARMISD::VMOVIMM, dl, VT,
5603 DAG.getTargetConstant(NumBits - 1, dl, ElemTy));
5604 SDValue CTLZ = DAG.getNode(ISD::CTLZ, dl, VT, LSB);
5605 return DAG.getNode(ISD::SUB, dl, VT, WidthMinus1, CTLZ);
5606 }
5607
5608 // Compute with: cttz(x) = ctpop(lsb - 1)
5609
5610 // Compute LSB - 1.
5611 SDValue Bits;
5612 if (ElemTy == MVT::i64) {
5613 // Load constant 0xffff'ffff'ffff'ffff to register.
5614 SDValue FF = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
5615 DAG.getTargetConstant(0x1eff, dl, MVT::i32));
5616 Bits = DAG.getNode(ISD::ADD, dl, VT, LSB, FF);
5617 } else {
5618 SDValue One = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
5619 DAG.getTargetConstant(1, dl, ElemTy));
5620 Bits = DAG.getNode(ISD::SUB, dl, VT, LSB, One);
5621 }
5622 return DAG.getNode(ISD::CTPOP, dl, VT, Bits);
5623 }
5624
5625 if (!ST->hasV6T2Ops())
5626 return SDValue();
5627
5628 SDValue rbit = DAG.getNode(ISD::BITREVERSE, dl, VT, N->getOperand(0));
5629 return DAG.getNode(ISD::CTLZ, dl, VT, rbit);
5630}
5631
5633 const ARMSubtarget *ST) {
5634 EVT VT = N->getValueType(0);
5635 SDLoc DL(N);
5636
5637 assert(ST->hasNEON() && "Custom ctpop lowering requires NEON.");
5638 assert((VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 ||
5639 VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) &&
5640 "Unexpected type for custom ctpop lowering");
5641
5642 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5643 EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;
5644 SDValue Res = DAG.getBitcast(VT8Bit, N->getOperand(0));
5645 Res = DAG.getNode(ISD::CTPOP, DL, VT8Bit, Res);
5646
5647 // Widen v8i8/v16i8 CTPOP result to VT by repeatedly widening pairwise adds.
5648 unsigned EltSize = 8;
5649 unsigned NumElts = VT.is64BitVector() ? 8 : 16;
5650 while (EltSize != VT.getScalarSizeInBits()) {
5652 Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpaddlu, DL,
5653 TLI.getPointerTy(DAG.getDataLayout())));
5654 Ops.push_back(Res);
5655
5656 EltSize *= 2;
5657 NumElts /= 2;
5658 MVT WidenVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize), NumElts);
5659 Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, WidenVT, Ops);
5660 }
5661
5662 return Res;
5663}
5664
5665/// Getvshiftimm - Check if this is a valid build_vector for the immediate
5666/// operand of a vector shift operation, where all the elements of the
5667/// build_vector must have the same constant integer value.
5668static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
5669 // Ignore bit_converts.
5670 while (Op.getOpcode() == ISD::BITCAST)
5671 Op = Op.getOperand(0);
5672 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
5673 APInt SplatBits, SplatUndef;
5674 unsigned SplatBitSize;
5675 bool HasAnyUndefs;
5676 if (!BVN ||
5677 !BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
5678 ElementBits) ||
5679 SplatBitSize > ElementBits)
5680 return false;
5681 Cnt = SplatBits.getSExtValue();
5682 return true;
5683}
5684
5685/// isVShiftLImm - Check if this is a valid build_vector for the immediate
5686/// operand of a vector shift left operation. That value must be in the range:
5687/// 0 <= Value < ElementBits for a left shift; or
5688/// 0 <= Value <= ElementBits for a long left shift.
5689static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) {
5690 assert(VT.isVector() && "vector shift count is not a vector type");
5691 int64_t ElementBits = VT.getScalarSizeInBits();
5692 if (!getVShiftImm(Op, ElementBits, Cnt))
5693 return false;
5694 return (Cnt >= 0 && (isLong ? Cnt - 1 : Cnt) < ElementBits);
5695}
5696
5697/// isVShiftRImm - Check if this is a valid build_vector for the immediate
5698/// operand of a vector shift right operation. For a shift opcode, the value
5699/// is positive, but for an intrinsic the value count must be negative. The
5700/// absolute value must be in the range:
5701/// 1 <= |Value| <= ElementBits for a right shift; or
5702/// 1 <= |Value| <= ElementBits/2 for a narrow right shift.
5703static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic,
5704 int64_t &Cnt) {
5705 assert(VT.isVector() && "vector shift count is not a vector type");
5706 int64_t ElementBits = VT.getScalarSizeInBits();
5707 if (!getVShiftImm(Op, ElementBits, Cnt))
5708 return false;
5709 if (!isIntrinsic)
5710 return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits / 2 : ElementBits));
5711 if (Cnt >= -(isNarrow ? ElementBits / 2 : ElementBits) && Cnt <= -1) {
5712 Cnt = -Cnt;
5713 return true;
5714 }
5715 return false;
5716}
5717
5719 const ARMSubtarget *ST) {
5720 EVT VT = N->getValueType(0);
5721 SDLoc dl(N);
5722 int64_t Cnt;
5723
5724 if (!VT.isVector())
5725 return SDValue();
5726
5727 // We essentially have two forms here. Shift by an immediate and shift by a
5728 // vector register (there are also shift by a gpr, but that is just handled
5729 // with a tablegen pattern). We cannot easily match shift by an immediate in
5730 // tablegen so we do that here and generate a VSHLIMM/VSHRsIMM/VSHRuIMM.
5731 // For shifting by a vector, we don't have VSHR, only VSHL (which can be
5732 // signed or unsigned, and a negative shift indicates a shift right).
5733 if (N->getOpcode() == ISD::SHL) {
5734 if (isVShiftLImm(N->getOperand(1), VT, false, Cnt))
5735 return DAG.getNode(ARMISD::VSHLIMM, dl, VT, N->getOperand(0),
5736 DAG.getConstant(Cnt, dl, MVT::i32));
5737 return DAG.getNode(ARMISD::VSHLu, dl, VT, N->getOperand(0),
5738 N->getOperand(1));
5739 }
5740
5741 assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
5742 "unexpected vector shift opcode");
5743
5744 if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) {
5745 unsigned VShiftOpc =
5746 (N->getOpcode() == ISD::SRA ? ARMISD::VSHRsIMM : ARMISD::VSHRuIMM);
5747 return DAG.getNode(VShiftOpc, dl, VT, N->getOperand(0),
5748 DAG.getConstant(Cnt, dl, MVT::i32));
5749 }
5750
5751 // Other right shifts we don't have operations for (we use a shift left by a
5752 // negative number).
5753 EVT ShiftVT = N->getOperand(1).getValueType();
5754 SDValue NegatedCount = DAG.getNode(
5755 ISD::SUB, dl, ShiftVT, getZeroVector(ShiftVT, DAG, dl), N->getOperand(1));
5756 unsigned VShiftOpc =
5757 (N->getOpcode() == ISD::SRA ? ARMISD::VSHLs : ARMISD::VSHLu);
5758 return DAG.getNode(VShiftOpc, dl, VT, N->getOperand(0), NegatedCount);
5759}
5760
5762 const ARMSubtarget *ST) {
5763 EVT VT = N->getValueType(0);
5764 SDLoc dl(N);
5765
5766 // We can get here for a node like i32 = ISD::SHL i32, i64
5767 if (VT != MVT::i64)
5768 return SDValue();
5769
5770 assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA ||
5771 N->getOpcode() == ISD::SHL) &&
5772 "Unknown shift to lower!");
5773
5774 unsigned ShOpc = N->getOpcode();
5775 if (ST->hasMVEIntegerOps()) {
5776 SDValue ShAmt = N->getOperand(1);
5777 unsigned ShPartsOpc = ARMISD::LSLL;
5778 ConstantSDNode *Con = dyn_cast<ConstantSDNode>(ShAmt);
5779
5780 // If the shift amount is greater than 32 then do the default optimisation
5781 if (Con && Con->getZExtValue() > 32)
5782 return SDValue();
5783
5784 // Extract the lower 32 bits of the shift amount if it's an i64
5785 if (ShAmt->getValueType(0) == MVT::i64)
5786 ShAmt = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, ShAmt,
5787 DAG.getConstant(0, dl, MVT::i32));
5788
5789 if (ShOpc == ISD::SRL) {
5790 if (!Con)
5791 // There is no t2LSRLr instruction so negate and perform an lsll if the
5792 // shift amount is in a register, emulating a right shift.
5793 ShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
5794 DAG.getConstant(0, dl, MVT::i32), ShAmt);
5795 else
5796 // Else generate an lsrl on the immediate shift amount
5797 ShPartsOpc = ARMISD::LSRL;
5798 } else if (ShOpc == ISD::SRA)
5799 ShPartsOpc = ARMISD::ASRL;
5800
5801 // Lower 32 bits of the destination/source
5802 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
5803 DAG.getConstant(0, dl, MVT::i32));
5804 // Upper 32 bits of the destination/source
5805 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
5806 DAG.getConstant(1, dl, MVT::i32));
5807
5808 // Generate the shift operation as computed above
5809 Lo = DAG.getNode(ShPartsOpc, dl, DAG.getVTList(MVT::i32, MVT::i32), Lo, Hi,
5810 ShAmt);
5811 // The upper 32 bits come from the second return value of lsll
5812 Hi = SDValue(Lo.getNode(), 1);
5813 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
5814 }
5815
5816 // We only lower SRA, SRL of 1 here, all others use generic lowering.
5817 if (!isOneConstant(N->getOperand(1)) || N->getOpcode() == ISD::SHL)
5818 return SDValue();
5819
5820 // If we are in thumb mode, we don't have RRX.
5821 if (ST->isThumb1Only())
5822 return SDValue();
5823
5824 // Okay, we have a 64-bit SRA or SRL of 1. Lower this to an RRX expr.
5825 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
5826 DAG.getConstant(0, dl, MVT::i32));
5827 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
5828 DAG.getConstant(1, dl, MVT::i32));
5829
5830 // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and
5831 // captures the result into a carry flag.
5832 unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG;
5833 Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), Hi);
5834
5835 // The low part is an ARMISD::RRX operand, which shifts the carry in.
5836 Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1));
5837
5838 // Merge the pieces into a single i64 value.
5839 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
5840}
5841
5843 SDValue TmpOp0, TmpOp1;
5844 bool Invert = false;
5845 bool Swap = false;
5846 unsigned Opc = 0;
5847
5848 SDValue Op0 = Op.getOperand(0);
5849 SDValue Op1 = Op.getOperand(1);
5850 SDValue CC = Op.getOperand(2);
5852 EVT VT = Op.getValueType();
5853 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
5854 SDLoc dl(Op);
5855
5857 (SetCCOpcode == ISD::SETEQ || SetCCOpcode == ISD::SETNE)) {
5858 // Special-case integer 64-bit equality comparisons. They aren't legal,
5859 // but they can be lowered with a few vector instructions.
5860 unsigned CmpElements = CmpVT.getVectorNumElements() * 2;
5861 EVT SplitVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, CmpElements);
5862 SDValue CastOp0 = DAG.getNode(ISD::BITCAST, dl, SplitVT, Op0);
5863 SDValue CastOp1 = DAG.getNode(ISD::BITCAST, dl, SplitVT, Op1);
5864 SDValue Cmp = DAG.getNode(ISD::SETCC, dl, SplitVT, CastOp0, CastOp1,
5865 DAG.getCondCode(ISD::SETEQ));
5866 SDValue Reversed = DAG.getNode(ARMISD::VREV64, dl, SplitVT, Cmp);
5867 SDValue Merged = DAG.getNode(ISD::AND, dl, SplitVT, Cmp, Reversed);
5868 Merged = DAG.getNode(ISD::BITCAST, dl, CmpVT, Merged);
5869 if (SetCCOpcode == ISD::SETNE)
5870 Merged = DAG.getNOT(dl, Merged, CmpVT);
5871 Merged = DAG.getSExtOrTrunc(Merged, dl, VT);
5872 return Merged;
5873 }
5874
5875 if (CmpVT.getVectorElementType() == MVT::i64)
5876 // 64-bit comparisons are not legal in general.
5877 return SDValue();
5878
5879 if (Op1.getValueType().isFloatingPoint()) {
5880 switch (SetCCOpcode) {
5881 default: llvm_unreachable("Illegal FP comparison");
5882 case ISD::SETUNE:
5883 case ISD::SETNE: Invert = true; LLVM_FALLTHROUGH;
5884 case ISD::SETOEQ:
5885 case ISD::SETEQ: Opc = ARMISD::VCEQ; break;
5886 case ISD::SETOLT:
5887 case ISD::SETLT: Swap = true; LLVM_FALLTHROUGH;
5888 case ISD::SETOGT:
5889 case ISD::SETGT: Opc = ARMISD::VCGT; break;
5890 case ISD::SETOLE:
5891 case ISD::SETLE: Swap = true; LLVM_FALLTHROUGH;
5892 case ISD::SETOGE:
5893 case ISD::SETGE: Opc = ARMISD::VCGE; break;
5894 case ISD::SETUGE: Swap = true; LLVM_FALLTHROUGH;
5895 case ISD::SETULE: Invert = true; Opc = ARMISD::VCGT; break;
5896 case ISD::SETUGT: Swap = true; LLVM_FALLTHROUGH;
5897 case ISD::SETULT: Invert = true; Opc = ARMISD::VCGE; break;
5898 case ISD::SETUEQ: Invert = true; LLVM_FALLTHROUGH;
5899 case ISD::SETONE:
5900 // Expand this to (OLT | OGT).
5901 TmpOp0 = Op0;
5902 TmpOp1 = Op1;
5903 Opc = ISD::OR;
5904 Op0 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp1, TmpOp0);
5905 Op1 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp0, TmpOp1);
5906 break;
5907 case ISD::SETUO:
5908 Invert = true;
5910 case ISD::SETO:
5911 // Expand this to (OLT | OGE).
5912 TmpOp0 = Op0;
5913 TmpOp1 = Op1;
5914 Opc = ISD::OR;
5915 Op0 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp1, TmpOp0);
5916 Op1 = DAG.getNode(ARMISD::VCGE, dl, CmpVT, TmpOp0, TmpOp1);
5917 break;
5918 }
5919 } else {
5920 // Integer comparisons.
5921 switch (SetCCOpcode) {
5922 default: llvm_unreachable("Illegal integer comparison");
5923 case ISD::SETNE: Invert = true; LLVM_FALLTHROUGH;
5924 case ISD::SETEQ: Opc = ARMISD::VCEQ; break;
5925 case ISD::SETLT: Swap = true; LLVM_FALLTHROUGH;
5926 case ISD::SETGT: Opc = ARMISD::VCGT; break;
5927 case ISD::SETLE: Swap = true; LLVM_FALLTHROUGH;
5928 case ISD::SETGE: Opc = ARMISD::VCGE; break;
5929 case ISD::SETULT: Swap = true; LLVM_FALLTHROUGH;
5930 case ISD::SETUGT: Opc = ARMISD::VCGTU; break;
5931 case ISD::SETULE: Swap = true; LLVM_FALLTHROUGH;
5932 case ISD::SETUGE: Opc = ARMISD::VCGEU; break;
5933 }
5934
5935 // Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero).
5936 if (Opc == ARMISD::VCEQ) {
5937 SDValue AndOp;
5939 AndOp = Op0;
5940 else if (ISD::isBuildVectorAllZeros(Op0.getNode()))
5941 AndOp = Op1;
5942
5943 // Ignore bitconvert.
5944 if (AndOp.getNode() && AndOp.getOpcode() == ISD::BITCAST)
5945 AndOp = AndOp.getOperand(0);
5946
5947 if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) {
5948 Opc = ARMISD::VTST;
5949 Op0 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(0));
5950 Op1 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(1));
5951 Invert = !Invert;
5952 }
5953 }
5954 }
5955
5956 if (Swap)
5957 std::swap(Op0, Op1);
5958
5959 // If one of the operands is a constant vector zero, attempt to fold the
5960 // comparison to a specialized compare-against-zero form.
5961 SDValue SingleOp;
5963 SingleOp = Op0;
5964 else if (ISD::isBuildVectorAllZeros(Op0.getNode())) {
5965 if (Opc == ARMISD::VCGE)
5966 Opc = ARMISD::VCLEZ;
5967 else if (Opc == ARMISD::VCGT)
5968 Opc = ARMISD::VCLTZ;
5969 SingleOp = Op1;
5970 }
5971
5972 SDValue Result;
5973 if (SingleOp.getNode()) {
5974 switch (Opc) {
5975 case ARMISD::VCEQ:
5976 Result = DAG.getNode(ARMISD::VCEQZ, dl, CmpVT, SingleOp); break;
5977 case ARMISD::VCGE:
5978 Result = DAG.getNode(ARMISD::VCGEZ, dl, CmpVT, SingleOp); break;
5979 case ARMISD::VCLEZ:
5980 Result = DAG.getNode(ARMISD::VCLEZ, dl, CmpVT, SingleOp); break;
5981 case ARMISD::VCGT:
5982 Result = DAG.getNode(ARMISD::VCGTZ, dl, CmpVT, SingleOp); break;
5983 case ARMISD::VCLTZ:
5984 Result = DAG.getNode(ARMISD::VCLTZ, dl, CmpVT, SingleOp); break;
5985 default:
5986 Result = DAG.getNode(Opc, dl, CmpVT, Op0, Op1);
5987 }
5988 } else {
5989 Result = DAG.getNode(Opc, dl, CmpVT, Op0, Op1);
5990 }
5991
5992 Result = DAG.getSExtOrTrunc(Result, dl, VT);
5993
5994 if (Invert)
5995 Result = DAG.getNOT(dl, Result, VT);
5996
5997 return Result;
5998}
5999
6001 SDValue LHS = Op.getOperand(0);
6002 SDValue RHS = Op.getOperand(1);
6003 SDValue Carry = Op.getOperand(2);
6004 SDValue Cond = Op.getOperand(3);
6005 SDLoc DL(Op);
6006
6007 assert(LHS.getSimpleValueType().isInteger() && "SETCCCARRY is integer only.");
6008
6009 // ARMISD::SUBE expects a carry not a borrow like ISD::SUBCARRY so we
6010 // have to invert the carry first.
6011 Carry = DAG.getNode(ISD::SUB, DL, MVT::i32,
6012 DAG.getConstant(1, DL, MVT::i32), Carry);
6013 // This converts the boolean value carry into the carry flag.
6014 Carry = ConvertBooleanCarryToCarryFlag(Carry, DAG);
6015
6016 SDVTList VTs = DAG.getVTList(LHS.getValueType(), MVT::i32);
6017 SDValue Cmp = DAG.getNode(ARMISD::SUBE, DL, VTs, LHS, RHS, Carry);
6018
6019 SDValue FVal = DAG.getConstant(0, DL, MVT::i32);
6020 SDValue TVal = DAG.getConstant(1, DL, MVT::i32);
6021 SDValue ARMcc = DAG.getConstant(
6022 IntCCToARMCC(cast<CondCodeSDNode>(Cond)->get()), DL, MVT::i32);
6023 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
6024 SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), DL, ARM::CPSR,
6025 Cmp.getValue(1), SDValue());
6026 return DAG.getNode(ARMISD::CMOV, DL, Op.getValueType(), FVal, TVal, ARMcc,
6027 CCR, Chain.getValue(1));
6028}
6029
6030/// isNEONModifiedImm - Check if the specified splat value corresponds to a
6031/// valid vector constant for a NEON or MVE instruction with a "modified immediate"
6032/// operand (e.g., VMOV). If so, return the encoded value.
6033static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
6034 unsigned SplatBitSize, SelectionDAG &DAG,
6035 const SDLoc &dl, EVT &VT, bool is128Bits,
6036 NEONModImmType type) {
6037 unsigned OpCmode, Imm;
6038
6039 // SplatBitSize is set to the smallest size that splats the vector, so a
6040 // zero vector will always have SplatBitSize == 8. However, NEON modified
6041 // immediate instructions others than VMOV do not support the 8-bit encoding
6042 // of a zero vector, and the default encoding of zero is supposed to be the
6043 // 32-bit version.
6044 if (SplatBits == 0)
6045 SplatBitSize = 32;
6046
6047 switch (SplatBitSize) {
6048 case 8:
6049 if (type != VMOVModImm)
6050 return SDValue();
6051 // Any 1-byte value is OK. Op=0, Cmode=1110.
6052 assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big");
6053 OpCmode = 0xe;
6054 Imm = SplatBits;
6055 VT = is128Bits ? MVT::v16i8 : MVT::v8i8;
6056 break;
6057
6058 case 16:
6059 // NEON's 16-bit VMOV supports splat values where only one byte is nonzero.
6060 VT = is128Bits ? MVT::v8i16 : MVT::v4i16;
6061 if ((SplatBits & ~0xff) == 0) {
6062 // Value = 0x00nn: Op=x, Cmode=100x.
6063 OpCmode = 0x8;
6064 Imm = SplatBits;
6065 break;
6066 }
6067 if ((SplatBits & ~0xff00) == 0) {
6068 // Value = 0xnn00: Op=x, Cmode=101x.
6069 OpCmode = 0xa;
6070 Imm = SplatBits >> 8;
6071 break;
6072 }
6073 return SDValue();
6074
6075 case 32:
6076 // NEON's 32-bit VMOV supports splat values where:
6077 // * only one byte is nonzero, or
6078 // * the least significant byte is 0xff and the second byte is nonzero, or
6079 // * the least significant 2 bytes are 0xff and the third is nonzero.
6080 VT = is128Bits ? MVT::v4i32 : MVT::v2i32;
6081 if ((SplatBits & ~0xff) == 0) {
6082 // Value = 0x000000nn: Op=x, Cmode=000x.
6083 OpCmode = 0;
6084 Imm = SplatBits;
6085 break;
6086 }
6087 if ((SplatBits & ~0xff00) == 0) {
6088 // Value = 0x0000nn00: Op=x, Cmode=001x.
6089 OpCmode = 0x2;
6090 Imm = SplatBits >> 8;
6091 break;
6092 }
6093 if ((SplatBits & ~0xff0000) == 0) {
6094 // Value = 0x00nn0000: Op=x, Cmode=010x.
6095 OpCmode = 0x4;
6096 Imm = SplatBits >> 16;
6097 break;
6098 }
6099 if ((SplatBits & ~0xff000000) == 0) {
6100 // Value = 0xnn000000: Op=x, Cmode=011x.
6101 OpCmode = 0x6;
6102 Imm = SplatBits >> 24;
6103 break;
6104 }
6105
6106 // cmode == 0b1100 and cmode == 0b1101 are not supported for VORR or VBIC
6107 if (type == OtherModImm) return SDValue();
6108
6109 if ((SplatBits & ~0xffff) == 0 &&
6110 ((SplatBits | SplatUndef) & 0xff) == 0xff) {
6111 // Value = 0x0000nnff: Op=x, Cmode=1100.
6112 OpCmode = 0xc;
6113 Imm = SplatBits >> 8;
6114 break;
6115 }
6116
6117 // cmode == 0b1101 is not supported for MVE VMVN
6118 if (type == MVEVMVNModImm)
6119 return SDValue();
6120
6121 if ((SplatBits & ~0xffffff) == 0 &&
6122 ((SplatBits | SplatUndef) & 0xffff) == 0xffff) {
6123 // Value = 0x00nnffff: Op=x, Cmode=1101.
6124 OpCmode = 0xd;
6125 Imm = SplatBits >> 16;
6126 break;
6127 }
6128
6129 // Note: there are a few 32-bit splat values (specifically: 00ffff00,
6130 // ff000000, ff0000ff, and ffff00ff) that are valid for VMOV.I64 but not
6131 // VMOV.I32. A (very) minor optimization would be to replicate the value
6132 // and fall through here to test for a valid 64-bit splat. But, then the
6133 // caller would also need to check and handle the change in size.
6134 return SDValue();
6135
6136 case 64: {
6137 if (type != VMOVModImm)
6138 return SDValue();
6139 // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff.
6140 uint64_t BitMask = 0xff;
6141 uint64_t Val = 0;
6142 unsigned ImmMask = 1;
6143 Imm = 0;
6144 for (int ByteNum = 0; ByteNum < 8; ++ByteNum) {
6145 if (((SplatBits | SplatUndef) & BitMask) == BitMask) {
6146 Val |= BitMask;
6147 Imm |= ImmMask;
6148 } else if ((SplatBits & BitMask) != 0) {
6149 return SDValue();
6150 }
6151 BitMask <<= 8;
6152 ImmMask <<= 1;
6153 }
6154
6155 if (DAG.getDataLayout().isBigEndian())
6156 // swap higher and lower 32 bit word
6157 Imm = ((Imm & 0xf) << 4) | ((Imm & 0xf0) >> 4);
6158
6159 // Op=1, Cmode=1110.
6160 OpCmode = 0x1e;
6161 VT = is128Bits ? MVT::v2i64 : MVT::v1i64;
6162 break;
6163 }
6164
6165 default:
6166 llvm_unreachable("unexpected size for isNEONModifiedImm");
6167 }
6168
6169 unsigned EncodedVal = ARM_AM::createNEONModImm(OpCmode, Imm);
6170 return DAG.getTargetConstant(EncodedVal, dl, MVT::i32);
6171}
6172
6173SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
6174 const ARMSubtarget *ST) const {
6175 EVT VT = Op.getValueType();
6176 bool IsDouble = (VT == MVT::f64);
6177 ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op);
6178 const APFloat &FPVal = CFP->getValueAPF();
6179
6180 // Prevent floating-point constants from using literal loads
6181 // when execute-only is enabled.
6182 if (ST->genExecuteOnly()) {
6183 // If we can represent the constant as an immediate, don't lower it
6184 if (isFPImmLegal(FPVal, VT))
6185 return Op;
6186 // Otherwise, construct as integer, and move to float register
6187 APInt INTVal = FPVal.bitcastToAPInt();
6188 SDLoc DL(CFP);
6189 switch (VT.getSimpleVT().SimpleTy) {
6190 default:
6191 llvm_unreachable("Unknown floating point type!");
6192 break;
6193 case MVT::f64: {
6194 SDValue Lo = DAG.getConstant(INTVal.trunc(32), DL, MVT::i32);
6195 SDValue Hi = DAG.getConstant(INTVal.lshr(32).trunc(32), DL, MVT::i32);
6196 if (!ST->isLittle())
6197 std::swap(Lo, Hi);
6198 return DAG.getNode(ARMISD::VMOVDRR, DL, MVT::f64, Lo, Hi);
6199 }
6200 case MVT::f32:
6201 return DAG.getNode(ARMISD::VMOVSR, DL, VT,
6202 DAG.getConstant(INTVal, DL, MVT::i32));
6203 }
6204 }
6205
6206 if (!ST->hasVFP3Base())
6207 return SDValue();
6208
6209 // Use the default (constant pool) lowering for double constants when we have
6210 // an SP-only FPU
6211 if (IsDouble && !Subtarget->hasFP64())
6212 return SDValue();
6213
6214 // Try splatting with a VMOV.f32...
6215 int ImmVal = IsDouble ? ARM_AM::getFP64Imm(FPVal) : ARM_AM::getFP32Imm(FPVal);
6216
6217 if (ImmVal != -1) {
6218 if (IsDouble || !ST->useNEONForSinglePrecisionFP()) {
6219 // We have code in place to select a valid ConstantFP already, no need to
6220 // do any mangling.
6221 return Op;
6222 }
6223
6224 // It's a float and we are trying to use NEON operations where
6225 // possible. Lower it to a splat followed by an extract.
6226 SDLoc DL(Op);
6227 SDValue NewVal = DAG.getTargetConstant(ImmVal, DL, MVT::i32);
6228 SDValue VecConstant = DAG.getNode(ARMISD::VMOVFPIMM, DL, MVT::v2f32,
6229 NewVal);
6230 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecConstant,
6231 DAG.getConstant(0, DL, MVT::i32));
6232 }
6233
6234 // The rest of our options are NEON only, make sure that's allowed before
6235 // proceeding..
6236 if (!ST->hasNEON() || (!IsDouble && !ST->useNEONForSinglePrecisionFP()))
6237 return SDValue();
6238
6239 EVT VMovVT;
6240 uint64_t iVal = FPVal.bitcastToAPInt().getZExtValue();
6241
6242 // It wouldn't really be worth bothering for doubles except for one very
6243 // important value, which does happen to match: 0.0. So make sure we don't do
6244 // anything stupid.
6245 if (IsDouble && (iVal & 0xffffffff) != (iVal >> 32))
6246 return SDValue();
6247
6248 // Try a VMOV.i32 (FIXME: i8, i16, or i64 could work too).
6249 SDValue NewVal = isNEONModifiedImm(iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op),
6250 VMovVT, false, VMOVModImm);
6251 if (NewVal != SDValue()) {
6252 SDLoc DL(Op);
6253 SDValue VecConstant = DAG.getNode(ARMISD::VMOVIMM, DL, VMovVT,
6254 NewVal);
6255 if (IsDouble)
6256 return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant);
6257
6258 // It's a float: cast and extract a vector element.
6259 SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
6260 VecConstant);
6261 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
6262 DAG.getConstant(0, DL, MVT::i32));
6263 }
6264
6265 // Finally, try a VMVN.i32
6266 NewVal = isNEONModifiedImm(~iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op), VMovVT,
6267 false, VMVNModImm);
6268 if (NewVal != SDValue()) {
6269 SDLoc DL(Op);
6270 SDValue VecConstant = DAG.getNode(ARMISD::VMVNIMM, DL, VMovVT, NewVal);
6271
6272 if (IsDouble)
6273 return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant);
6274
6275 // It's a float: cast and extract a vector element.
6276 SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
6277 VecConstant);
6278 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
6279 DAG.getConstant(0, DL, MVT::i32));
6280 }
6281
6282 return SDValue();
6283}
6284
6285// check if an VEXT instruction can handle the shuffle mask when the
6286// vector sources of the shuffle are the same.
6287static bool isSingletonVEXTMask(ArrayRef<int> M, EVT VT, unsigned &Imm) {
6288 unsigned NumElts = VT.getVectorNumElements();
6289
6290 // Assume that the first shuffle index is not UNDEF. Fail if it is.
6291 if (M[0] < 0)
6292 return false;
6293
6294 Imm = M[0];
6295
6296 // If this is a VEXT shuffle, the immediate value is the index of the first
6297 // element. The other shuffle indices must be the successive elements after
6298 // the first one.
6299 unsigned ExpectedElt = Imm;
6300 for (unsigned i = 1; i < NumElts; ++i) {
6301 // Increment the expected index. If it wraps around, just follow it
6302 // back to index zero and keep going.
6303 ++ExpectedElt;
6304 if (ExpectedElt == NumElts)
6305 ExpectedElt = 0;
6306
6307 if (M[i] < 0) continue; // ignore UNDEF indices
6308 if (ExpectedElt != static_cast<unsigned>(M[i]))
6309 return false;
6310 }
6311
6312 return true;
6313}
6314
6315static bool isVEXTMask(ArrayRef<int> M, EVT VT,
6316 bool &ReverseVEXT, unsigned &Imm) {
6317 unsigned NumElts = VT.getVectorNumElements();
6318 ReverseVEXT = false;
6319
6320 // Assume that the first shuffle index is not UNDEF. Fail if it is.
6321 if (M[0] < 0)
6322 return false;
6323
6324 Imm = M[0];
6325
6326 // If this is a VEXT shuffle, the immediate value is the index of the first
6327 // element. The other shuffle indices must be the successive elements after
6328 // the first one.
6329 unsigned ExpectedElt = Imm;
6330 for (unsigned i = 1; i < NumElts; ++i) {
6331 // Increment the expected index. If it wraps around, it may still be
6332 // a VEXT but the source vectors must be swapped.
6333 ExpectedElt += 1;
6334 if (ExpectedElt == NumElts * 2) {
6335 ExpectedElt = 0;
6336 ReverseVEXT = true;
6337 }
6338
6339 if (M[i] < 0) continue; // ignore UNDEF indices
6340 if (ExpectedElt != static_cast<unsigned>(M[i]))
6341 return false;
6342 }
6343
6344 // Adjust the index value if the source operands will be swapped.
6345 if (ReverseVEXT)
6346 Imm -= NumElts;
6347
6348 return true;
6349}
6350
6351/// isVREVMask - Check if a vector shuffle corresponds to a VREV
6352/// instruction with the specified blocksize. (The order of the elements
6353/// within each block of the vector is reversed.)
6354static bool isVREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) {
6355 assert((BlockSize==16 || BlockSize==32 || BlockSize==64) &&
6356 "Only possible block sizes for VREV are: 16, 32, 64");
6357
6358 unsigned EltSz = VT.getScalarSizeInBits();
6359 if (EltSz == 64)
6360 return false;
6361
6362 unsigned NumElts = VT.getVectorNumElements();
6363 unsigned BlockElts = M[0] + 1;
6364 // If the first shuffle index is UNDEF, be optimistic.
6365 if (M[0] < 0)
6366 BlockElts = BlockSize / EltSz;
6367
6368 if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)
6369 return false;
6370
6371 for (unsigned i = 0; i < NumElts; ++i) {
6372 if (M[i] < 0) continue; // ignore UNDEF indices
6373 if ((unsigned) M[i] != (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts))
6374 return false;
6375 }
6376
6377 return true;
6378}
6379
6380static bool isVTBLMask(ArrayRef<int> M, EVT VT) {
6381 // We can handle <8 x i8> vector shuffles. If the index in the mask is out of
6382 // range, then 0 is placed into the resulting vector. So pretty much any mask
6383 // of 8 elements can work here.
6384 return VT == MVT::v8i8 && M.size() == 8;
6385}
6386
6387static unsigned SelectPairHalf(unsigned Elements, ArrayRef<int> Mask,
6388 unsigned Index) {
6389 if (Mask.size() == Elements * 2)
6390 return Index / Elements;
6391 return Mask[Index] == 0 ? 0 : 1;
6392}
6393
6394// Checks whether the shuffle mask represents a vector transpose (VTRN) by
6395// checking that pairs of elements in the shuffle mask represent the same index
6396// in each vector, incrementing the expected index by 2 at each step.
6397// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 4, 2, 6]
6398// v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,e,c,g}
6399// v2={e,f,g,h}
6400// WhichResult gives the offset for each element in the mask based on which
6401// of the two results it belongs to.
6402//
6403// The transpose can be represented either as:
6404// result1 = shufflevector v1, v2, result1_shuffle_mask
6405// result2 = shufflevector v1, v2, result2_shuffle_mask
6406// where v1/v2 and the shuffle masks have the same number of elements
6407// (here WhichResult (see below) indicates which result is being checked)
6408//
6409// or as:
6410// results = shufflevector v1, v2, shuffle_mask
6411// where both results are returned in one vector and the shuffle mask has twice
6412// as many elements as v1/v2 (here WhichResult will always be 0 if true) here we
6413// want to check the low half and high half of the shuffle mask as if it were
6414// the other case
6415static bool isVTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
6416 unsigned EltSz = VT.getScalarSizeInBits();
6417 if (EltSz == 64)
6418 return false;
6419
6420 unsigned NumElts = VT.getVectorNumElements();
6421 if (M.size() != NumElts && M.size() != NumElts*2)
6422 return false;
6423
6424 // If the mask is twice as long as the input vector then we need to check the
6425 // upper and lower parts of the mask with a matching value for WhichResult
6426 // FIXME: A mask with only even values will be rejected in case the first
6427 // element is undefined, e.g. [-1, 4, 2, 6] will be rejected, because only
6428 // M[0] is used to determine WhichResult
6429 for (unsigned i = 0; i < M.size(); i += NumElts) {
6430 WhichResult = SelectPairHalf(NumElts, M, i);
6431 for (unsigned j = 0; j < NumElts; j += 2) {
6432 if ((M[i+j] >= 0 && (unsigned) M[i+j] != j + WhichResult) ||
6433 (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != j + NumElts + WhichResult))
6434 return false;
6435 }
6436 }
6437
6438 if (M.size() == NumElts*2)
6439 WhichResult = 0;
6440
6441 return true;
6442}
6443
6444/// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of
6445/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
6446/// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.
6447static bool isVTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
6448 unsigned EltSz = VT.getScalarSizeInBits();
6449 if (EltSz == 64)
6450 return false;
6451
6452 unsigned NumElts = VT.getVectorNumElements();
6453 if (M.size() != NumElts && M.size() != NumElts*2)
6454 return false;
6455
6456 for (unsigned i = 0; i < M.size(); i += NumElts) {
6457 WhichResult = SelectPairHalf(NumElts, M, i);
6458 for (unsigned j = 0; j < NumElts; j += 2) {
6459 if ((M[i+j] >= 0 && (unsigned) M[i+j] != j + WhichResult) ||
6460 (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != j + WhichResult))
6461 return false;
6462 }
6463 }
6464
6465 if (M.size() == NumElts*2)
6466 WhichResult = 0;
6467
6468 return true;
6469}
6470
6471// Checks whether the shuffle mask represents a vector unzip (VUZP) by checking
6472// that the mask elements are either all even and in steps of size 2 or all odd
6473// and in steps of size 2.
6474// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 2, 4, 6]
6475// v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,c,e,g}
6476// v2={e,f,g,h}
6477// Requires similar checks to that of isVTRNMask with
6478// respect the how results are returned.
6479static bool isVUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
6480 unsigned EltSz = VT.getScalarSizeInBits();
6481 if (EltSz == 64)
6482 return false;
6483
6484 unsigned NumElts = VT.getVectorNumElements();
6485 if (M.size() != NumElts && M.size() != NumElts*2)
6486 return false;
6487
6488 for (unsigned i = 0; i < M.size(); i += NumElts) {
6489 WhichResult = SelectPairHalf(NumElts, M, i);
6490 for (unsigned j = 0; j < NumElts; ++j) {
6491 if (M[i+j] >= 0 && (unsigned) M[i+j] != 2 * j + WhichResult)
6492 return false;
6493 }
6494 }
6495
6496 if (M.size() == NumElts*2)
6497 WhichResult = 0;
6498
6499 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
6500 if (VT.is64BitVector() && EltSz == 32)
6501 return false;
6502
6503 return true;
6504}
6505
6506/// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of
6507/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
6508/// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>,
6509static bool isVUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
6510 unsigned EltSz = VT.getScalarSizeInBits();
6511 if (EltSz == 64)
6512 return false;
6513
6514 unsigned NumElts = VT.getVectorNumElements();
6515 if (M.size() != NumElts && M.size() != NumElts*2)
6516 return false;
6517
6518 unsigned Half = NumElts / 2;
6519 for (unsigned i = 0; i < M.size(); i += NumElts) {
6520 WhichResult = SelectPairHalf(NumElts, M, i);
6521 for (unsigned j = 0; j < NumElts; j += Half) {
6522 unsigned Idx = WhichResult;
6523 for (unsigned k = 0; k < Half; ++k) {
6524 int MIdx = M[i + j + k];
6525 if (MIdx >= 0 && (unsigned) MIdx != Idx)
6526 return false;
6527 Idx += 2;
6528 }
6529 }
6530 }
6531
6532 if (M.size() == NumElts*2)
6533 WhichResult = 0;
6534
6535 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
6536 if (VT.is64BitVector() && EltSz == 32)
6537 return false;
6538
6539 return true;
6540}
6541
6542// Checks whether the shuffle mask represents a vector zip (VZIP) by checking
6543// that pairs of elements of the shufflemask represent the same index in each
6544// vector incrementing sequentially through the vectors.
6545// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 4, 1, 5]
6546// v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,e,b,f}
6547// v2={e,f,g,h}
6548// Requires similar checks to that of isVTRNMask with respect the how results
6549// are returned.
6550static bool isVZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
6551 unsigned EltSz = VT.getScalarSizeInBits();
6552 if (EltSz == 64)
6553 return false;
6554
6555 unsigned NumElts = VT.getVectorNumElements();
6556 if (M.size() != NumElts && M.size() != NumElts*2)
6557 return false;
6558
6559 for (unsigned i = 0; i < M.size(); i += NumElts) {
6560 WhichResult = SelectPairHalf(NumElts, M, i);
6561 unsigned Idx = WhichResult * NumElts / 2;
6562 for (unsigned j = 0; j < NumElts; j += 2) {
6563 if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) ||
6564 (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != Idx + NumElts))
6565 return false;
6566 Idx += 1;
6567 }
6568 }
6569
6570 if (M.size() == NumElts*2)
6571 WhichResult = 0;
6572
6573 // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
6574 if (VT.is64BitVector() && EltSz == 32)
6575 return false;
6576
6577 return true;
6578}
6579
6580/// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of
6581/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
6582/// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.
6583static bool isVZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
6584 unsigned EltSz = VT.getScalarSizeInBits();
6585 if (EltSz == 64)
6586 return false;
6587
6588 unsigned NumElts = VT.getVectorNumElements();
6589 if (M.size() != NumElts && M.size() != NumElts*2)
6590 return false;
6591
6592 for (unsigned i = 0; i < M.size(); i += NumElts) {
6593 WhichResult = SelectPairHalf(NumElts, M, i);
6594 unsigned Idx = WhichResult * NumElts / 2;
6595 for (unsigned j = 0; j < NumElts; j += 2) {
6596 if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) ||
6597 (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != Idx))
6598 return false;
6599 Idx += 1;
6600 }
6601 }
6602
6603 if (M.size() == NumElts*2)
6604 WhichResult = 0;
6605
6606 // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
6607 if (VT.is64BitVector() && EltSz == 32)
6608 return false;
6609
6610 return true;
6611}
6612
6613/// Check if \p ShuffleMask is a NEON two-result shuffle (VZIP, VUZP, VTRN),
6614/// and return the corresponding ARMISD opcode if it is, or 0 if it isn't.
6615static unsigned isNEONTwoResultShuffleMask(ArrayRef<int> ShuffleMask, EVT VT,
6616 unsigned &WhichResult,
6617 bool &isV_UNDEF) {
6618 isV_UNDEF = false;
6619 if (isVTRNMask(ShuffleMask, VT, WhichResult))
6620 return ARMISD::VTRN;
6621 if (isVUZPMask(ShuffleMask, VT, WhichResult))
6622 return ARMISD::VUZP;
6623 if (isVZIPMask(ShuffleMask, VT, WhichResult))
6624 return ARMISD::VZIP;
6625
6626 isV_UNDEF = true;
6627 if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult))
6628 return ARMISD::VTRN;
6629 if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult))
6630 return ARMISD::VUZP;
6631 if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult))
6632 return ARMISD::VZIP;
6633
6634 return 0;
6635}
6636
6637/// \return true if this is a reverse operation on an vector.
6638static bool isReverseMask(ArrayRef<int> M, EVT VT) {
6639 unsigned NumElts = VT.getVectorNumElements();
6640 // Make sure the mask has the right size.
6641 if (NumElts != M.size())
6642 return false;
6643
6644 // Look for <15, ..., 3, -1, 1, 0>.
6645 for (unsigned i = 0; i != NumElts; ++i)
6646 if (M[i] >= 0 && M[i] != (int) (NumElts - 1 - i))
6647 return false;
6648
6649 return true;
6650}
6651
6652// If N is an integer constant that can be moved into a register in one
6653// instruction, return an SDValue of such a constant (will become a MOV
6654// instruction). Otherwise return null.
6656 const ARMSubtarget *ST, const SDLoc &dl) {
6657 uint64_t Val;
6658 if (!isa<ConstantSDNode>(N))
6659 return SDValue();
6660 Val = cast<ConstantSDNode>(N)->getZExtValue();
6661
6662 if (ST->isThumb1Only()) {
6663 if (Val <= 255 || ~Val <= 255)
6664 return DAG.getConstant(Val, dl, MVT::i32);
6665 } else {
6666 if (ARM_AM::getSOImmVal(Val) != -1 || ARM_AM::getSOImmVal(~Val) != -1)
6667 return DAG.getConstant(Val, dl, MVT::i32);
6668 }
6669 return SDValue();
6670}
6671
6672// If this is a case we can't handle, return null and let the default
6673// expansion code take care of it.
6674SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
6675 const ARMSubtarget *ST) const {
6676 BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
6677 SDLoc dl(Op);
6678 EVT VT = Op.getValueType();
6679
6680 APInt SplatBits, SplatUndef;
6681 unsigned SplatBitSize;
6682 bool HasAnyUndefs;
6683 if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
6684 if (SplatUndef.isAllOnesValue())
6685 return DAG.getUNDEF(VT);
6686
6687 if ((ST->hasNEON() && SplatBitSize <= 64) ||
6688 (ST->hasMVEIntegerOps() && SplatBitSize <= 32)) {
6689 // Check if an immediate VMOV works.
6690 EVT VmovVT;
6691 SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(),
6692 SplatUndef.getZExtValue(), SplatBitSize,
6693 DAG, dl, VmovVT, VT.is128BitVector(),
6694 VMOVModImm);
6695
6696 if (Val.getNode()) {
6697 SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val);
6698 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
6699 }
6700
6701 // Try an immediate VMVN.
6702 uint64_t NegatedImm = (~SplatBits).getZExtValue();
6703 Val = isNEONModifiedImm(
6704 NegatedImm, SplatUndef.getZExtValue(), SplatBitSize,
6705 DAG, dl, VmovVT, VT.is128BitVector(),
6706 ST->hasMVEIntegerOps() ? MVEVMVNModImm : VMVNModImm);
6707 if (Val.getNode()) {
6708 SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val);
6709 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
6710 }
6711
6712 // Use vmov.f32 to materialize other v2f32 and v4f32 splats.
6713 if ((VT == MVT::v2f32 || VT == MVT::v4f32) && SplatBitSize == 32) {
6714 int ImmVal = ARM_AM::getFP32Imm(SplatBits);
6715 if (ImmVal != -1) {
6716 SDValue Val = DAG.getTargetConstant(ImmVal, dl, MVT::i32);
6717 return DAG.getNode(ARMISD::VMOVFPIMM, dl, VT, Val);
6718 }
6719 }
6720 }
6721 }
6722
6723 // Scan through the operands to see if only one value is used.
6724 //
6725 // As an optimisation, even if more than one value is used it may be more
6726 // profitable to splat with one value then change some lanes.
6727 //
6728 // Heuristically we decide to do this if the vector has a "dominant" value,
6729 // defined as splatted to more than half of the lanes.
6730 unsigned NumElts = VT.getVectorNumElements();
6731 bool isOnlyLowElement = true;
6732 bool usesOnlyOneValue = true;
6733 bool hasDominantValue = false;
6734 bool isConstant = true;
6735
6736 // Map of the number of times a particular SDValue appears in the
6737 // element list.
6738 DenseMap<SDValue, unsigned> ValueCounts;
6739 SDValue Value;
6740 for (unsigned i = 0; i < NumElts; ++i) {
6741 SDValue V = Op.getOperand(i);
6742 if (V.isUndef())
6743 continue;
6744 if (i > 0)
6745 isOnlyLowElement = false;
6746 if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
6747 isConstant = false;
6748
6749 ValueCounts.insert(std::make_pair(V, 0));
6750 unsigned &Count = ValueCounts[V];
6751
6752 // Is this value dominant? (takes up more than half of the lanes)
6753 if (++Count > (NumElts / 2)) {
6754 hasDominantValue = true;
6755 Value = V;
6756 }
6757 }
6758 if (ValueCounts.size() != 1)
6759 usesOnlyOneValue = false;
6760 if (!Value.getNode() && !ValueCounts.empty())
6761 Value = ValueCounts.begin()->first;
6762
6763 if (ValueCounts.empty())
6764 return DAG.getUNDEF(VT);
6765
6766 // Loads are better lowered with insert_vector_elt/ARMISD::BUILD_VECTOR.
6767 // Keep going if we are hitting this case.
6768 if (isOnlyLowElement && !ISD::isNormalLoad(Value.getNode()))
6769 return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
6770
6771 unsigned EltSize = VT.getScalarSizeInBits();
6772
6773 // Use VDUP for non-constant splats. For f32 constant splats, reduce to
6774 // i32 and try again.
6775 if (hasDominantValue && EltSize <= 32) {
6776 if (!isConstant) {
6777 SDValue N;
6778
6779 // If we are VDUPing a value that comes directly from a vector, that will
6780 // cause an unnecessary move to and from a GPR, where instead we could
6781 // just use VDUPLANE. We can only do this if the lane being extracted
6782 // is at a constant index, as the VDUP from lane instructions only have
6783 // constant-index forms.
6784 ConstantSDNode *constIndex;
6785 if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
6786 (constIndex = dyn_cast<ConstantSDNode>(Value->getOperand(1)))) {
6787 // We need to create a new undef vector to use for the VDUPLANE if the
6788 // size of the vector from which we get the value is different than the
6789 // size of the vector that we need to create. We will insert the element
6790 // such that the register coalescer will remove unnecessary copies.
6791 if (VT != Value->getOperand(0).getValueType()) {
6792 unsigned index = constIndex->getAPIntValue().getLimitedValue() %
6794 N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
6795 DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DAG.getUNDEF(VT),
6796 Value, DAG.getConstant(index, dl, MVT::i32)),
6797 DAG.getConstant(index, dl, MVT::i32));
6798 } else
6799 N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
6800 Value->getOperand(0), Value->getOperand(1));
6801 } else
6802 N = DAG.getNode(ARMISD::VDUP, dl, VT, Value);
6803
6804 if (!usesOnlyOneValue) {
6805 // The dominant value was splatted as 'N', but we now have to insert
6806 // all differing elements.
6807 for (unsigned I = 0; I < NumElts; ++I) {
6808 if (Op.getOperand(I) == Value)
6809 continue;
6811 Ops.push_back(N);
6812 Ops.push_back(Op.getOperand(I));
6813 Ops.push_back(DAG.getConstant(I, dl, MVT::i32));
6814 N = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Ops);
6815 }
6816 }
6817 return N;
6818 }
6822 assert(FVT == MVT::f32 || FVT == MVT::f16);
6823 MVT IVT = (FVT == MVT::f32) ? MVT::i32 : MVT::i16;
6824 for (unsigned i = 0; i < NumElts; ++i)
6825 Ops.push_back(DAG.getNode(ISD::BITCAST, dl, IVT,
6826 Op.getOperand(i)));
6827 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), IVT, NumElts);
6828 SDValue Val = DAG.getBuildVector(VecVT, dl, Ops);
6829 Val = LowerBUILD_VECTOR(Val, DAG, ST);
6830 if (Val.getNode())
6831 return DAG.getNode(ISD::BITCAST, dl, VT, Val);
6832 }
6833 if (usesOnlyOneValue) {
6834 SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl);
6835 if (isConstant && Val.getNode())
6836 return DAG.getNode(ARMISD::VDUP, dl, VT, Val);
6837 }
6838 }
6839
6840 // If all elements are constants and the case above didn't get hit, fall back
6841 // to the default expansion, which will generate a load from the constant
6842 // pool.
6843 if (isConstant)
6844 return SDValue();
6845
6846 // Empirical tests suggest this is rarely worth it for vectors of length <= 2.
6847 if (NumElts >= 4) {
6848 SDValue shuffle = ReconstructShuffle(Op, DAG);
6849 if (shuffle != SDValue())
6850 return shuffle;
6851 }
6852
6853 if (ST->hasNEON() && VT.is128BitVector() && VT != MVT::v2f64 && VT != MVT::v4f32) {
6854 // If we haven't found an efficient lowering, try splitting a 128-bit vector
6855 // into two 64-bit vectors; we might discover a better way to lower it.
6856 SmallVector<SDValue, 64> Ops(Op->op_begin(), Op->op_begin() + NumElts);
6857 EVT ExtVT = VT.getVectorElementType();
6858 EVT HVT = EVT::getVectorVT(*DAG.getContext(), ExtVT, NumElts / 2);
6859 SDValue Lower =
6860 DAG.getBuildVector(HVT, dl, makeArrayRef(&Ops[0], NumElts / 2));
6861 if (Lower.getOpcode() == ISD::BUILD_VECTOR)
6862 Lower = LowerBUILD_VECTOR(Lower, DAG, ST);
6864 HVT, dl, makeArrayRef(&Ops[NumElts / 2], NumElts / 2));
6865 if (Upper.getOpcode() == ISD::BUILD_VECTOR)
6866 Upper = LowerBUILD_VECTOR(Upper, DAG, ST);
6867 if (Lower && Upper)
6868 return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lower, Upper);
6869 }
6870
6871 // Vectors with 32- or 64-bit elements can be built by directly assigning
6872 // the subregisters. Lower it to an ARMISD::BUILD_VECTOR so the operands
6873 // will be legalized.
6874 if (EltSize >= 32) {
6875 // Do the expansion with floating-point types, since that is what the VFP
6876 // registers are defined to use, and since i64 is not legal.
6877 EVT EltVT = EVT::getFloatingPointVT(EltSize);
6878 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
6880 for (unsigned i = 0; i < NumElts; ++i)
6881 Ops.push_back(DAG.getNode(ISD::BITCAST, dl, EltVT, Op.getOperand(i)));
6882 SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops);
6883 return DAG.getNode(ISD::BITCAST, dl, VT, Val);
6884 }
6885
6886 // If all else fails, just use a sequence of INSERT_VECTOR_ELT when we
6887 // know the default expansion would otherwise fall back on something even
6888 // worse. For a vector with one or two non-undef values, that's
6889 // scalar_to_vector for the elements followed by a shuffle (provided the
6890 // shuffle is valid for the target) and materialization element by element
6891 // on the stack followed by a load for everything else.
6892 if (!isConstant && !usesOnlyOneValue) {
6893 SDValue Vec = DAG.getUNDEF(VT);
6894 for (unsigned i = 0 ; i < NumElts; ++i) {
6895 SDValue V = Op.getOperand(i);
6896 if (V.isUndef())
6897 continue;
6898 SDValue LaneIdx = DAG.getConstant(i, dl, MVT::i32);
6899 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Vec, V, LaneIdx);
6900 }
6901 return Vec;
6902 }
6903
6904 return SDValue();
6905}
6906
6907// Gather data to see if the operation can be modelled as a
6908// shuffle in combination with VEXTs.
6909SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
6910 SelectionDAG &DAG) const {
6911 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
6912 SDLoc dl(Op);
6913 EVT VT = Op.getValueType();
6914 unsigned NumElts = VT.getVectorNumElements();
6915
6916 struct ShuffleSourceInfo {
6917 SDValue Vec;
6918 unsigned MinElt = std::numeric_limits<unsigned>::max();
6919 unsigned MaxElt = 0;
6920
6921 // We may insert some combination of BITCASTs and VEXT nodes to force Vec to
6922 // be compatible with the shuffle we intend to construct. As a result
6923 // ShuffleVec will be some sliding window into the original Vec.
6924 SDValue ShuffleVec;
6925
6926 // Code should guarantee that element i in Vec starts at element "WindowBase
6927 // + i * WindowScale in ShuffleVec".
6928 int WindowBase = 0;
6929 int WindowScale = 1;
6930
6931 ShuffleSourceInfo(SDValue Vec) : Vec(Vec), ShuffleVec(Vec) {}
6932
6933 bool operator ==(SDValue OtherVec) { return Vec == OtherVec; }
6934 };
6935
6936 // First gather all vectors used as an immediate source for this BUILD_VECTOR
6937 // node.
6939 for (unsigned i = 0; i < NumElts; ++i) {
6940 SDValue V = Op.getOperand(i);
6941 if (V.isUndef())
6942 continue;
6943 else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT) {
6944 // A shuffle can only come from building a vector from various
6945 // elements of other vectors.
6946 return SDValue();
6947 } else if (!isa<ConstantSDNode>(V.getOperand(1))) {
6948 // Furthermore, shuffles require a constant mask, whereas extractelts
6949 // accept variable indices.
6950 return SDValue();
6951 }
6952
6953 // Add this element source to the list if it's not already there.
6954 SDValue SourceVec = V.getOperand(0);
6955 auto Source = llvm::find(Sources, SourceVec);
6956 if (Source == Sources.end())
6957 Source = Sources.insert(Sources.end(), ShuffleSourceInfo(SourceVec));
6958
6959 // Update the minimum and maximum lane number seen.
6960 unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();
6961 Source->MinElt = std::min(Source->MinElt, EltNo);
6962 Source->MaxElt = std::max(Source->MaxElt, EltNo);
6963 }
6964
6965 // Currently only do something sane when at most two source vectors
6966 // are involved.
6967 if (Sources.size() > 2)
6968 return SDValue();
6969
6970 // Find out the smallest element size among result and two sources, and use
6971 // it as element size to build the shuffle_vector.
6972 EVT SmallestEltTy = VT.getVectorElementType();
6973 for (auto &Source : Sources) {
6974 EVT SrcEltTy = Source.Vec.getValueType().getVectorElementType();
6975 if (SrcEltTy.bitsLT(SmallestEltTy))
6976 SmallestEltTy = SrcEltTy;
6977 }
6978 unsigned ResMultiplier =
6979 VT.getScalarSizeInBits() / SmallestEltTy.getSizeInBits();
6980 NumElts = VT.getSizeInBits() / SmallestEltTy.getSizeInBits();
6981 EVT ShuffleVT = EVT::getVectorVT(*DAG.getContext(), SmallestEltTy, NumElts);
6982
6983 // If the source vector is too wide or too narrow, we may nevertheless be able
6984 // to construct a compatible shuffle either by concatenating it with UNDEF or
6985 // extracting a suitable range of elements.
6986 for (auto &Src : Sources) {
6987 EVT SrcVT = Src.ShuffleVec.getValueType();
6988
6989 if (SrcVT.getSizeInBits() == VT.getSizeInBits())
6990 continue;
6991
6992 // This stage of the search produces a source with the same element type as
6993 // the original, but with a total width matching the BUILD_VECTOR output.
6994 EVT EltVT = SrcVT.getVectorElementType();
6995 unsigned NumSrcElts = VT.getSizeInBits() / EltVT.getSizeInBits();
6996 EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumSrcElts);
6997
6998 if (SrcVT.getSizeInBits() < VT.getSizeInBits()) {
6999 if (2 * SrcVT.getSizeInBits() != VT.getSizeInBits())
7000 return SDValue();
7001 // We can pad out the smaller vector for free, so if it's part of a
7002 // shuffle...
7003 Src.ShuffleVec =
7004 DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, Src.ShuffleVec,
7005 DAG.getUNDEF(Src.ShuffleVec.getValueType()));
7006 continue;
7007 }
7008
7009 if (SrcVT.getSizeInBits() != 2 * VT.getSizeInBits())
7010 return SDValue();
7011
7012 if (Src.MaxElt - Src.MinElt >= NumSrcElts) {
7013 // Span too large for a VEXT to cope
7014 return SDValue();
7015 }
7016
7017 if (Src.MinElt >= NumSrcElts) {
7018 // The extraction can just take the second half
7019 Src.ShuffleVec =
7020 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
7021 DAG.getConstant(NumSrcElts, dl, MVT::i32));
7022 Src.WindowBase = -NumSrcElts;
7023 } else if (Src.MaxElt < NumSrcElts) {
7024 // The extraction can just take the first half
7025 Src.ShuffleVec =
7026 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
7027 DAG.getConstant(0, dl, MVT::i32));
7028 } else {
7029 // An actual VEXT is needed
7030 SDValue VEXTSrc1 =
7031 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
7032 DAG.getConstant(0, dl, MVT::i32));
7033 SDValue VEXTSrc2 =
7034 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
7035 DAG.getConstant(NumSrcElts, dl, MVT::i32));
7036
7037 Src.ShuffleVec = DAG.getNode(ARMISD::VEXT, dl, DestVT, VEXTSrc1,
7038 VEXTSrc2,
7039 DAG.getConstant(Src.MinElt, dl, MVT::i32));
7040 Src.WindowBase = -Src.MinElt;
7041 }
7042 }
7043
7044 // Another possible incompatibility occurs from the vector element types. We
7045 // can fix this by bitcasting the source vectors to the same type we intend
7046 // for the shuffle.
7047 for (auto &Src : Sources) {
7048 EVT SrcEltTy = Src.ShuffleVec.getValueType().getVectorElementType();
7049 if (SrcEltTy == SmallestEltTy)
7050 continue;
7051 assert(ShuffleVT.getVectorElementType() == SmallestEltTy);
7052 Src.ShuffleVec = DAG.getNode(ISD::BITCAST, dl, ShuffleVT, Src.ShuffleVec);
7053 Src.WindowScale = SrcEltTy.getSizeInBits() / SmallestEltTy.getSizeInBits();
7054 Src.WindowBase *= Src.WindowScale;
7055 }
7056
7057 // Final sanity check before we try to actually produce a shuffle.
7058 LLVM_DEBUG(for (auto Src
7059 : Sources)
7060 assert(Src.ShuffleVec.getValueType() == ShuffleVT););
7061
7062 // The stars all align, our next step is to produce the mask for the shuffle.
7064 int BitsPerShuffleLane = ShuffleVT.getScalarSizeInBits();
7065 for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
7066 SDValue Entry = Op.getOperand(i);
7067 if (Entry.isUndef())
7068 continue;
7069
7070 auto Src = llvm::find(Sources, Entry.getOperand(0));
7071 int EltNo = cast<ConstantSDNode>(Entry.getOperand(1))->getSExtValue();
7072
7073 // EXTRACT_VECTOR_ELT performs an implicit any_ext; BUILD_VECTOR an implicit
7074 // trunc. So only std::min(SrcBits, DestBits) actually get defined in this
7075 // segment.
7076 EVT OrigEltTy = Entry.getOperand(0).getValueType().getVectorElementType();
7077 int BitsDefined = std::min(OrigEltTy.getSizeInBits(),
7078 VT.getScalarSizeInBits());
7079 int LanesDefined = BitsDefined / BitsPerShuffleLane;
7080
7081 // This source is expected to fill ResMultiplier lanes of the final shuffle,
7082 // starting at the appropriate offset.
7083 int *LaneMask = &Mask[i * ResMultiplier];
7084
7085 int ExtractBase = EltNo * Src->WindowScale + Src->WindowBase;
7086 ExtractBase += NumElts * (Src - Sources.begin());
7087 for (int j = 0; j < LanesDefined; ++j)
7088 LaneMask[j] = ExtractBase + j;
7089 }
7090
7091 // Final check before we try to produce nonsense...
7092 if (!isShuffleMaskLegal(Mask, ShuffleVT))
7093 return SDValue();
7094
7095 // We can't handle more than two sources. This should have already
7096 // been checked before this point.
7097 assert(Sources.size() <= 2 && "Too many sources!");
7098
7099 SDValue ShuffleOps[] = { DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT) };
7100 for (unsigned i = 0; i < Sources.size(); ++i)
7101 ShuffleOps[i] = Sources[i].ShuffleVec;
7102
7103 SDValue Shuffle = DAG.getVectorShuffle(ShuffleVT, dl, ShuffleOps[0],
7104 ShuffleOps[1], Mask);
7105 return DAG.getNode(ISD::BITCAST, dl, VT, Shuffle);
7106}
7107
7109 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
7118 OP_VUZPL, // VUZP, left result
7119 OP_VUZPR, // VUZP, right result
7120 OP_VZIPL, // VZIP, left result
7121 OP_VZIPR, // VZIP, right result
7122 OP_VTRNL, // VTRN, left result
7123 OP_VTRNR // VTRN, right result
7125
7126static bool isLegalMVEShuffleOp(unsigned PFEntry) {
7127 unsigned OpNum = (PFEntry >> 26) & 0x0F;
7128 switch (OpNum) {
7129 case OP_COPY:
7130 case OP_VREV:
7131 case OP_VDUP0:
7132 case OP_VDUP1:
7133 case OP_VDUP2:
7134 case OP_VDUP3:
7135 return true;
7136 }
7137 return false;
7138}
7139
7140/// isShuffleMaskLegal - Targets can use this to indicate that they only
7141/// support *some* VECTOR_SHUFFLE operations, those with specific masks.
7142/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
7143/// are assumed to be legal.
7145 if (VT.getVectorNumElements() == 4 &&
7146 (VT.is128BitVector() || VT.is64BitVector())) {
7147 unsigned PFIndexes[4];
7148 for (unsigned i = 0; i != 4; ++i) {
7149 if (M[i] < 0)
7150 PFIndexes[i] = 8;
7151 else
7152 PFIndexes[i] = M[i];
7153 }
7154
7155 // Compute the index in the perfect shuffle table.
7156 unsigned PFTableIndex =
7157 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
7158 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
7159 unsigned Cost = (PFEntry >> 30);
7160
7161 if (Cost <= 4 && (Subtarget->hasNEON() || isLegalMVEShuffleOp(PFEntry)))
7162 return true;
7163 }
7164
7165 bool ReverseVEXT, isV_UNDEF;
7166 unsigned Imm, WhichResult;
7167
7168 unsigned EltSize = VT.getScalarSizeInBits();
7169 if (EltSize >= 32 ||
7171 isVREVMask(M, VT, 64) ||
7172 isVREVMask(M, VT, 32) ||
7173 isVREVMask(M, VT, 16))
7174 return true;
7175 else if (Subtarget->hasNEON() &&
7176 (isVEXTMask(M, VT, ReverseVEXT, Imm) ||
7177 isVTBLMask(M, VT) ||
7178 isNEONTwoResultShuffleMask(M, VT, WhichResult, isV_UNDEF)))
7179 return true;
7180 else if (Subtarget->hasNEON() && (VT == MVT::v8i16 || VT == MVT::v16i8) &&
7181 isReverseMask(M, VT))
7182 return true;
7183 else
7184 return false;
7185}
7186
7187/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
7188/// the specified operations to build the shuffle.
7189static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
7190 SDValue RHS, SelectionDAG &DAG,
7191 const SDLoc &dl) {
7192 unsigned OpNum = (PFEntry >> 26) & 0x0F;
7193 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
7194 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
7195
7196 if (OpNum == OP_COPY) {
7197 if (LHSID == (1*9+2)*9+3) return LHS;
7198 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
7199 return RHS;
7200 }
7201
7202 SDValue OpLHS, OpRHS;
7203 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
7204 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
7205 EVT VT = OpLHS.getValueType();
7206
7207 switch (OpNum) {
7208 default: llvm_unreachable("Unknown shuffle opcode!");
7209 case OP_VREV:
7210 // VREV divides the vector in half and swaps within the half.
7211 if (VT.getVectorElementType() == MVT::i32 ||
7213 return DAG.getNode(ARMISD::VREV64, dl, VT, OpLHS);
7214 // vrev <4 x i16> -> VREV32
7215 if (VT.getVectorElementType() == MVT::i16)
7216 return DAG.getNode(ARMISD::VREV32, dl, VT, OpLHS);
7217 // vrev <4 x i8> -> VREV16
7219 return DAG.getNode(ARMISD::VREV16, dl, VT, OpLHS);
7220 case OP_VDUP0:
7221 case OP_VDUP1:
7222 case OP_VDUP2:
7223 case OP_VDUP3:
7224 return DAG.getNode(ARMISD::VDUPLANE, dl, VT,
7225 OpLHS, DAG.getConstant(OpNum-OP_VDUP0, dl, MVT::i32));
7226 case OP_VEXT1:
7227 case OP_VEXT2:
7228 case OP_VEXT3:
7229 return DAG.getNode(ARMISD::VEXT, dl, VT,
7230 OpLHS, OpRHS,
7231 DAG.getConstant(OpNum - OP_VEXT1 + 1, dl, MVT::i32));
7232 case OP_VUZPL:
7233 case OP_VUZPR:
7234 return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
7235 OpLHS, OpRHS).getValue(OpNum-OP_VUZPL);
7236 case OP_VZIPL:
7237 case OP_VZIPR:
7238 return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
7239 OpLHS, OpRHS).getValue(OpNum-OP_VZIPL);
7240 case OP_VTRNL:
7241 case OP_VTRNR:
7242 return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
7243 OpLHS, OpRHS).getValue(OpNum-OP_VTRNL);
7244 }
7245}
7246
7248 ArrayRef<int> ShuffleMask,
7249 SelectionDAG &DAG) {
7250 // Check to see if we can use the VTBL instruction.
7251 SDValue V1 = Op.getOperand(0);
7252 SDValue V2 = Op.getOperand(1);
7253 SDLoc DL(Op);
7254
7255 SmallVector<SDValue, 8> VTBLMask;
7257 I = ShuffleMask.begin(), E = ShuffleMask.end(); I != E; ++I)
7258 VTBLMask.push_back(DAG.getConstant(*I, DL, MVT::i32));
7259
7260 if (V2.getNode()->isUndef())
7261 return DAG.getNode(ARMISD::VTBL1, DL, MVT::v8i8, V1,
7262 DAG.getBuildVector(MVT::v8i8, DL, VTBLMask));
7263
7264 return DAG.getNode(ARMISD::VTBL2, DL, MVT::v8i8, V1, V2,
7265 DAG.getBuildVector(MVT::v8i8, DL, VTBLMask));
7266}
7267
7269 SelectionDAG &DAG) {
7270 SDLoc DL(Op);
7271 SDValue OpLHS = Op.getOperand(0);
7272 EVT VT = OpLHS.getValueType();
7273
7274 assert((VT == MVT::v8i16 || VT == MVT::v16i8) &&
7275 "Expect an v8i16/v16i8 type");
7276 OpLHS = DAG.getNode(ARMISD::VREV64, DL, VT, OpLHS);
7277 // For a v16i8 type: After the VREV, we have got <8, ...15, 8, ..., 0>. Now,
7278 // extract the first 8 bytes into the top double word and the last 8 bytes
7279 // into the bottom double word. The v8i16 case is similar.
7280 unsigned ExtractNum = (VT == MVT::v16i8) ? 8 : 4;
7281 return DAG.getNode(ARMISD::VEXT, DL, VT, OpLHS, OpLHS,
7282 DAG.getConstant(ExtractNum, DL, MVT::i32));
7283}
7284
7286 const ARMSubtarget *ST) {
7287 SDValue V1 = Op.getOperand(0);
7288 SDValue V2 = Op.getOperand(1);
7289 SDLoc dl(Op);
7290 EVT VT = Op.getValueType();
7291 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
7292
7293 // Convert shuffles that are directly supported on NEON to target-specific
7294 // DAG nodes, instead of keeping them as shuffles and matching them again
7295 // during code selection. This is more efficient and avoids the possibility
7296 // of inconsistencies between legalization and selection.
7297 // FIXME: floating-point vectors should be canonicalized to integer vectors
7298 // of the same time so that they get CSEd properly.
7299 ArrayRef<int> ShuffleMask = SVN->getMask();
7300
7301 unsigned EltSize = VT.getScalarSizeInBits();
7302 if (EltSize <= 32) {
7303 if (SVN->isSplat()) {
7304 int Lane = SVN->getSplatIndex();
7305 // If this is undef splat, generate it via "just" vdup, if possible.
7306 if (Lane == -1) Lane = 0;
7307
7308 // Test if V1 is a SCALAR_TO_VECTOR.
7309 if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) {
7310 return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
7311 }
7312 // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR
7313 // (and probably will turn into a SCALAR_TO_VECTOR once legalization
7314 // reaches it).
7315 if (Lane == 0 && V1.getOpcode() == ISD::BUILD_VECTOR &&
7316 !isa<ConstantSDNode>(V1.getOperand(0))) {
7317 bool IsScalarToVector = true;
7318 for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i)
7319 if (!V1.getOperand(i).isUndef()) {
7320 IsScalarToVector = false;
7321 break;
7322 }
7323 if (IsScalarToVector)
7324 return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
7325 }
7326 return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1,
7327 DAG.getConstant(Lane, dl, MVT::i32));
7328 }
7329
7330 bool ReverseVEXT = false;
7331 unsigned Imm = 0;
7332 if (ST->hasNEON() && isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) {
7333 if (ReverseVEXT)
7334 std::swap(V1, V2);
7335 return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2,
7336 DAG.getConstant(Imm, dl, MVT::i32));
7337 }
7338
7339 if (isVREVMask(ShuffleMask, VT, 64))
7340 return DAG.getNode(ARMISD::VREV64, dl, VT, V1);
7341 if (isVREVMask(ShuffleMask, VT, 32))
7342 return DAG.getNode(ARMISD::VREV32, dl, VT, V1);
7343 if (isVREVMask(ShuffleMask, VT, 16))
7344 return DAG.getNode(ARMISD::VREV16, dl, VT, V1);
7345
7346 if (ST->hasNEON() && V2->isUndef() && isSingletonVEXTMask(ShuffleMask, VT, Imm)) {
7347 return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V1,
7348 DAG.getConstant(Imm, dl, MVT::i32));
7349 }
7350
7351 // Check for Neon shuffles that modify both input vectors in place.
7352 // If both results are used, i.e., if there are two shuffles with the same
7353 // source operands and with masks corresponding to both results of one of
7354 // these operations, DAG memoization will ensure that a single node is
7355 // used for both shuffles.
7356 unsigned WhichResult = 0;
7357 bool isV_UNDEF = false;
7358 if (ST->hasNEON()) {
7359 if (unsigned ShuffleOpc = isNEONTwoResultShuffleMask(
7360 ShuffleMask, VT, WhichResult, isV_UNDEF)) {
7361 if (isV_UNDEF)
7362 V2 = V1;
7363 return DAG.getNode(ShuffleOpc, dl, DAG.getVTList(VT, VT), V1, V2)
7364 .getValue(WhichResult);
7365 }
7366 }
7367
7368 // Also check for these shuffles through CONCAT_VECTORS: we canonicalize
7369 // shuffles that produce a result larger than their operands with:
7370 // shuffle(concat(v1, undef), concat(v2, undef))
7371 // ->
7372 // shuffle(concat(v1, v2), undef)
7373 // because we can access quad vectors (see PerformVECTOR_SHUFFLECombine).
7374 //
7375 // This is useful in the general case, but there are special cases where
7376 // native shuffles produce larger results: the two-result ops.
7377 //
7378 // Look through the concat when lowering them:
7379 // shuffle(concat(v1, v2), undef)
7380 // ->
7381 // concat(VZIP(v1, v2):0, :1)
7382 //
7383 if (ST->hasNEON() && V1->getOpcode() == ISD::CONCAT_VECTORS && V2->isUndef()) {
7384 SDValue SubV1 = V1->getOperand(0);
7385 SDValue SubV2 = V1->getOperand(1);
7386 EVT SubVT = SubV1.getValueType();
7387
7388 // We expect these to have been canonicalized to -1.
7389 assert(llvm::all_of(ShuffleMask, [&](int i) {
7390 return i < (int)VT.getVectorNumElements();
7391 }) && "Unexpected shuffle index into UNDEF operand!");
7392
7393 if (unsigned ShuffleOpc = isNEONTwoResultShuffleMask(
7394 ShuffleMask, SubVT, WhichResult, isV_UNDEF)) {
7395 if (isV_UNDEF)
7396 SubV2 = SubV1;
7397 assert((WhichResult == 0) &&
7398 "In-place shuffle of concat can only have one result!");
7399 SDValue Res = DAG.getNode(ShuffleOpc, dl, DAG.getVTList(SubVT, SubVT),
7400 SubV1, SubV2);
7401 return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Res.getValue(0),
7402 Res.getValue(1));
7403 }
7404 }
7405 }
7406
7407 // If the shuffle is not directly supported and it has 4 elements, use
7408 // the PerfectShuffle-generated table to synthesize it from other shuffles.
7409 unsigned NumElts = VT.getVectorNumElements();
7410 if (NumElts == 4) {
7411 unsigned PFIndexes[4];
7412 for (unsigned i = 0; i != 4; ++i) {
7413 if (ShuffleMask[i] < 0)
7414 PFIndexes[i] = 8;
7415 else
7416 PFIndexes[i] = ShuffleMask[i];
7417 }
7418
7419 // Compute the index in the perfect shuffle table.
7420 unsigned PFTableIndex =
7421 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
7422 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
7423 unsigned Cost = (PFEntry >> 30);
7424
7425 if (Cost <= 4) {
7426 if (ST->hasNEON())
7427 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
7428 else if (isLegalMVEShuffleOp(PFEntry)) {
7429 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
7430 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
7431 unsigned PFEntryLHS = PerfectShuffleTable[LHSID];
7432 unsigned PFEntryRHS = PerfectShuffleTable[RHSID];
7433 if (isLegalMVEShuffleOp(PFEntryLHS) && isLegalMVEShuffleOp(PFEntryRHS))
7434 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
7435 }
7436 }
7437 }
7438
7439 // Implement shuffles with 32- or 64-bit elements as ARMISD::BUILD_VECTORs.
7440 if (EltSize >= 32) {
7441 // Do the expansion with floating-point types, since that is what the VFP
7442 // registers are defined to use, and since i64 is not legal.
7443 EVT EltVT = EVT::getFloatingPointVT(EltSize);
7444 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
7445 V1 = DAG.getNode(ISD::BITCAST, dl, VecVT, V1);
7446 V2 = DAG.getNode(ISD::BITCAST, dl, VecVT, V2);
7448 for (unsigned i = 0; i < NumElts; ++i) {
7449 if (ShuffleMask[i] < 0)
7450 Ops.push_back(DAG.getUNDEF(EltVT));
7451 else
7452 Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
7453 ShuffleMask[i] < (int)NumElts ? V1 : V2,
7454 DAG.getConstant(ShuffleMask[i] & (NumElts-1),
7455 dl, MVT::i32)));
7456 }
7457 SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops);
7458 return DAG.getNode(ISD::BITCAST, dl, VT, Val);
7459 }
7460
7461 if (ST->hasNEON() && (VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(ShuffleMask, VT))
7463
7464 if (ST->hasNEON() && VT == MVT::v8i8)
7465 if (SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG))
7466 return NewOp;
7467
7468 return SDValue();
7469}
7470
7471SDValue ARMTargetLowering::
7472LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const {
7473 // INSERT_VECTOR_ELT is legal only for immediate indexes.
7474 SDValue Lane = Op.getOperand(2);
7475 if (!isa<ConstantSDNode>(Lane))
7476 return SDValue();
7477
7478 SDValue Elt = Op.getOperand(1);
7479 EVT EltVT = Elt.getValueType();
7480 if (getTypeAction(*DAG.getContext(), EltVT) ==
7482 // INSERT_VECTOR_ELT doesn't want f16 operands promoting to f32,
7483 // but the type system will try to do that if we don't intervene.
7484 // Reinterpret any such vector-element insertion as one with the
7485 // corresponding integer types.
7486
7487 SDLoc dl(Op);
7488
7489 EVT IEltVT = MVT::getIntegerVT(EltVT.getScalarSizeInBits());
7490 assert(getTypeAction(*DAG.getContext(), IEltVT) !=
7492
7493 SDValue VecIn = Op.getOperand(0);
7494 EVT VecVT = VecIn.getValueType();
7495 EVT IVecVT = EVT::getVectorVT(*DAG.getContext(), IEltVT,
7496 VecVT.getVectorNumElements());
7497
7498 SDValue IElt = DAG.getNode(ISD::BITCAST, dl, IEltVT, Elt);
7499 SDValue IVecIn = DAG.getNode(ISD::BITCAST, dl, IVecVT, VecIn);
7500 SDValue IVecOut = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, IVecVT,
7501 IVecIn, IElt, Lane);
7502 return DAG.getNode(ISD::BITCAST, dl, VecVT, IVecOut);
7503 }
7504
7505 return Op;
7506}
7507
7509 // EXTRACT_VECTOR_ELT is legal only for immediate indexes.
7510 SDValue Lane = Op.getOperand(1);
7511 if (!isa<ConstantSDNode>(Lane))
7512 return SDValue();
7513
7514 SDValue Vec = Op.getOperand(0);
7515 if (Op.getValueType() == MVT::i32 && Vec.getScalarValueSizeInBits() < 32) {
7516 SDLoc dl(Op);
7517 return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane);
7518 }
7519
7520 return Op;
7521}
7522
7524 // The only time a CONCAT_VECTORS operation can have legal types is when
7525 // two 64-bit vectors are concatenated to a 128-bit vector.
7526 assert(Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 &&
7527 "unexpected CONCAT_VECTORS");
7528 SDLoc dl(Op);
7529 SDValue Val = DAG.getUNDEF(MVT::v2f64);
7530 SDValue Op0 = Op.getOperand(0);
7531 SDValue Op1 = Op.getOperand(1);
7532 if (!Op0.isUndef())
7533 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
7534 DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op0),
7535 DAG.getIntPtrConstant(0, dl));
7536 if (!Op1.isUndef())
7537 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
7538 DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op1),
7539 DAG.getIntPtrConstant(1, dl));
7540 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Val);
7541}
7542
7543/// isExtendedBUILD_VECTOR - Check if N is a constant BUILD_VECTOR where each
7544/// element has been zero/sign-extended, depending on the isSigned parameter,
7545/// from an integer type half its size.
7547 bool isSigned) {
7548 // A v2i64 BUILD_VECTOR will have been legalized to a BITCAST from v4i32.
7549 EVT VT = N->getValueType(0);
7550 if (VT == MVT::v2i64 && N->getOpcode() == ISD::BITCAST) {
7551 SDNode *BVN = N->getOperand(0).getNode();
7552 if (BVN->getValueType(0) != MVT::v4i32 ||
7553 BVN->getOpcode() != ISD::BUILD_VECTOR)
7554 return false;
7555 unsigned LoElt = DAG.getDataLayout().isBigEndian() ? 1 : 0;
7556 unsigned HiElt = 1 - LoElt;
7557 ConstantSDNode *Lo0 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt));
7558 ConstantSDNode *Hi0 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt));
7559 ConstantSDNode *Lo1 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt+2));
7560 ConstantSDNode *Hi1 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt+2));
7561 if (!Lo0 || !Hi0 || !Lo1 || !Hi1)
7562 return false;
7563 if (isSigned) {
7564 if (Hi0->getSExtValue() == Lo0->getSExtValue() >> 32 &&
7565 Hi1->getSExtValue() == Lo1->getSExtValue() >> 32)
7566 return true;
7567 } else {
7568 if (Hi0->isNullValue() && Hi1->isNullValue())
7569 return true;
7570 }
7571 return false;
7572 }
7573
7574 if (N->getOpcode() != ISD::BUILD_VECTOR)
7575 return false;
7576
7577 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
7578 SDNode *Elt = N->getOperand(i).getNode();
7579 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
7580 unsigned EltSize = VT.getScalarSizeInBits();
7581 unsigned HalfSize = EltSize / 2;
7582 if (isSigned) {
7583 if (!isIntN(HalfSize, C->getSExtValue()))
7584 return false;
7585 } else {
7586 if (!isUIntN(HalfSize, C->getZExtValue()))
7587 return false;
7588 }
7589 continue;
7590 }
7591 return false;
7592 }
7593
7594 return true;
7595}
7596
7597/// isSignExtended - Check if a node is a vector value that is sign-extended
7598/// or a constant BUILD_VECTOR with sign-extended elements.
7600 if (N->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N))
7601 return true;
7602 if (isExtendedBUILD_VECTOR(N, DAG, true))
7603 return true;
7604 return false;
7605}
7606
7607/// isZeroExtended - Check if a node is a vector value that is zero-extended
7608/// or a constant BUILD_VECTOR with zero-extended elements.
7610 if (N->getOpcode() == ISD::ZERO_EXTEND || ISD::isZEXTLoad(N))
7611 return true;
7612 if (isExtendedBUILD_VECTOR(N, DAG, false))
7613 return true;
7614 return false;
7615}
7616
7617static EVT getExtensionTo64Bits(const EVT &OrigVT) {
7618 if (OrigVT.getSizeInBits() >= 64)
7619 return OrigVT;
7620
7621 assert(OrigVT.isSimple() && "Expecting a simple value type");
7622
7623 MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy;
7624 switch (OrigSimpleTy) {
7625 default: llvm_unreachable("Unexpected Vector Type");
7626 case MVT::v2i8:
7627 case MVT::v2i16:
7628 return MVT::v2i32;
7629 case MVT::v4i8:
7630 return MVT::v4i16;
7631 }
7632}
7633
7634/// AddRequiredExtensionForVMULL - Add a sign/zero extension to extend the total
7635/// value size to 64 bits. We need a 64-bit D register as an operand to VMULL.
7636/// We insert the required extension here to get the vector to fill a D register.
7638 const EVT &OrigTy,
7639 const EVT &ExtTy,
7640 unsigned ExtOpcode) {
7641 // The vector originally had a size of OrigTy. It was then extended to ExtTy.
7642 // We expect the ExtTy to be 128-bits total. If the OrigTy is less than
7643 // 64-bits we need to insert a new extension so that it will be 64-bits.
7644 assert(ExtTy.is128BitVector() && "Unexpected extension size");
7645 if (OrigTy.getSizeInBits() >= 64)
7646 return N;
7647
7648 // Must extend size to at least 64 bits to be used as an operand for VMULL.
7649 EVT NewVT = getExtensionTo64Bits(OrigTy);
7650
7651 return DAG.getNode(ExtOpcode, SDLoc(N), NewVT, N);
7652}
7653
7654/// SkipLoadExtensionForVMULL - return a load of the original vector size that
7655/// does not do any sign/zero extension. If the original vector is less
7656/// than 64 bits, an appropriate extension will be added after the load to
7657/// reach a total size of 64 bits. We have to add the extension separately
7658/// because ARM does not have a sign/zero extending load for vectors.
7660 EVT ExtendedTy = getExtensionTo64Bits(LD->getMemoryVT());
7661
7662 // The load already has the right type.
7663 if (ExtendedTy == LD->getMemoryVT())
7664 return DAG.getLoad(LD->getMemoryVT(), SDLoc(LD), LD->getChain(),
7665 LD->getBasePtr(), LD->getPointerInfo(),
7666 LD->getAlignment(), LD->getMemOperand()->getFlags());
7667
7668 // We need to create a zextload/sextload. We cannot just create a load
7669 // followed by a zext/zext node because LowerMUL is also run during normal
7670 // operation legalization where we can't create illegal types.
7671 return DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD), ExtendedTy,
7672 LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(),
7673 LD->getMemoryVT(), LD->getAlignment(),
7674 LD->getMemOperand()->getFlags());
7675}
7676
7677/// SkipExtensionForVMULL - For a node that is a SIGN_EXTEND, ZERO_EXTEND,
7678/// extending load, or BUILD_VECTOR with extended elements, return the
7679/// unextended value. The unextended vector should be 64 bits so that it can
7680/// be used as an operand to a VMULL instruction. If the original vector size
7681/// before extension is less than 64 bits we add a an extension to resize
7682/// the vector to 64 bits.
7684 if (N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND)
7685 return AddRequiredExtensionForVMULL(N->getOperand(0), DAG,
7686 N->getOperand(0)->getValueType(0),
7687 N->getValueType(0),
7688 N->getOpcode());
7689
7690 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
7691 assert((ISD::isSEXTLoad(LD) || ISD::isZEXTLoad(LD)) &&
7692 "Expected extending load");
7693
7694 SDValue newLoad = SkipLoadExtensionForVMULL(LD, DAG);
7695 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), newLoad.getValue(1));
7696 unsigned Opcode = ISD::isSEXTLoad(LD) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
7697 SDValue extLoad =
7698 DAG.getNode(Opcode, SDLoc(newLoad), LD->getValueType(0), newLoad);
7699 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 0), extLoad);
7700
7701 return newLoad;
7702 }
7703
7704 // Otherwise, the value must be a BUILD_VECTOR. For v2i64, it will
7705 // have been legalized as a BITCAST from v4i32.
7706 if (N->getOpcode() == ISD::BITCAST) {
7707 SDNode *BVN = N->getOperand(0).getNode();
7709 BVN->getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR");
7710 unsigned LowElt = DAG.getDataLayout().isBigEndian() ? 1 : 0;
7711 return DAG.getBuildVector(
7712 MVT::v2i32, SDLoc(N),
7713 {BVN->getOperand(LowElt), BVN->getOperand(LowElt + 2)});
7714 }
7715 // Construct a new BUILD_VECTOR with elements truncated to half the size.
7716 assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR");
7717 EVT VT = N->getValueType(0);
7718 unsigned EltSize = VT.getScalarSizeInBits() / 2;
7719 unsigned NumElts = VT.getVectorNumElements();
7720 MVT TruncVT = MVT::getIntegerVT(EltSize);
7722 SDLoc dl(N);
7723 for (unsigned i = 0; i != NumElts; ++i) {
7724 ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i));
7725 const APInt &CInt = C->getAPIntValue();
7726 // Element types smaller than 32 bits are not legal, so use i32 elements.
7727 // The values are implicitly truncated so sext vs. zext doesn't matter.
7728 Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32));
7729 }
7730 return DAG.getBuildVector(MVT::getVectorVT(TruncVT, NumElts), dl, Ops);
7731}
7732
7733static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) {
7734 unsigned Opcode = N->getOpcode();
7735 if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
7736 SDNode *N0 = N->getOperand(0).getNode();
7737 SDNode *N1 = N->getOperand(1).getNode();
7738 return N0->hasOneUse() && N1->hasOneUse() &&
7739 isSignExtended(N0, DAG) && isSignExtended(N1, DAG);
7740 }
7741 return false;
7742}
7743
7744static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) {
7745 unsigned Opcode = N->getOpcode();
7746 if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
7747 SDNode *N0 = N->getOperand(0).getNode();
7748 SDNode *N1 = N->getOperand(1).getNode();
7749 return N0->hasOneUse() && N1->hasOneUse() &&
7750 isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG);
7751 }
7752 return false;
7753}
7754
7756 // Multiplications are only custom-lowered for 128-bit vectors so that
7757 // VMULL can be detected. Otherwise v2i64 multiplications are not legal.
7758 EVT VT = Op.getValueType();
7759 assert(VT.is128BitVector() && VT.isInteger() &&
7760 "unexpected type for custom-lowering ISD::MUL");
7761 SDNode *N0 = Op.getOperand(0).getNode();
7762 SDNode *N1 = Op.getOperand(1).getNode();
7763 unsigned NewOpc = 0;
7764 bool isMLA = false;
7765 bool isN0SExt = isSignExtended(N0, DAG);
7766 bool isN1SExt = isSignExtended(N1, DAG);
7767 if (isN0SExt && isN1SExt)
7768 NewOpc = ARMISD::VMULLs;
7769 else {
7770 bool isN0ZExt = isZeroExtended(N0, DAG);
7771 bool isN1ZExt = isZeroExtended(N1, DAG);
7772 if (isN0ZExt && isN1ZExt)
7773 NewOpc = ARMISD::VMULLu;
7774 else if (isN1SExt || isN1ZExt) {
7775 // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these
7776 // into (s/zext A * s/zext C) + (s/zext B * s/zext C)
7777 if (isN1SExt && isAddSubSExt(N0, DAG)) {
7778 NewOpc = ARMISD::VMULLs;
7779 isMLA = true;
7780 } else if (isN1ZExt && isAddSubZExt(N0, DAG)) {
7781 NewOpc = ARMISD::VMULLu;
7782 isMLA = true;
7783 } else if (isN0ZExt && isAddSubZExt(N1, DAG)) {
7784 std::swap(N0, N1);
7785 NewOpc = ARMISD::VMULLu;
7786 isMLA = true;
7787 }
7788 }
7789
7790 if (!NewOpc) {
7791 if (VT == MVT::v2i64)
7792 // Fall through to expand this. It is not legal.
7793 return SDValue();
7794 else
7795 // Other vector multiplications are legal.
7796 return Op;
7797 }
7798 }
7799
7800 // Legalize to a VMULL instruction.
7801 SDLoc DL(Op);
7802 SDValue Op0;
7803 SDValue Op1 = SkipExtensionForVMULL(N1, DAG);
7804 if (!isMLA) {
7805 Op0 = SkipExtensionForVMULL(N0, DAG);
7807 Op1.getValueType().is64BitVector() &&
7808 "unexpected types for extended operands to VMULL");
7809 return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
7810 }
7811
7812 // Optimizing (zext A + zext B) * C, to (VMULL A, C) + (VMULL B, C) during
7813 // isel lowering to take advantage of no-stall back to back vmul + vmla.
7814 // vmull q0, d4, d6
7815 // vmlal q0, d5, d6
7816 // is faster than
7817 // vaddl q0, d4, d5
7818 // vmovl q1, d6
7819 // vmul q0, q0, q1
7820 SDValue N00 = SkipExtensionForVMULL(N0->getOperand(0).getNode(), DAG);
7821 SDValue N01 = SkipExtensionForVMULL(N0->getOperand(1).getNode(), DAG);
7822 EVT Op1VT = Op1.getValueType();
7823 return DAG.getNode(N0->getOpcode(), DL, VT,
7824 DAG.getNode(NewOpc, DL, VT,
7825 DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1),
7826 DAG.getNode(NewOpc, DL, VT,
7827 DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
7828}
7829
7831 SelectionDAG &DAG) {
7832 // TODO: Should this propagate fast-math-flags?
7833
7834 // Convert to float
7835 // float4 xf = vcvt_f32_s32(vmovl_s16(a.lo));
7836 // float4 yf = vcvt_f32_s32(vmovl_s16(b.lo));
7837 X = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, X);
7838 Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Y);
7839 X = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, X);
7840 Y = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, Y);
7841 // Get reciprocal estimate.
7842 // float4 recip = vrecpeq_f32(yf);
7844 DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
7845 Y);
7846 // Because char has a smaller range than uchar, we can actually get away
7847 // without any newton steps. This requires that we use a weird bias
7848 // of 0xb000, however (again, this has been exhaustively tested).
7849 // float4 result = as_float4(as_int4(xf*recip) + 0xb000);
7850 X = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, X, Y);
7851 X = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, X);
7852 Y = DAG.getConstant(0xb000, dl, MVT::v4i32);
7853 X = DAG.getNode(ISD::ADD, dl, MVT::v4i32, X, Y);
7854 X = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, X);
7855 // Convert back to short.
7856 X = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, X);
7857 X = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, X);
7858 return X;
7859}
7860
7862 SelectionDAG &DAG) {
7863 // TODO: Should this propagate fast-math-flags?
7864
7865 SDValue N2;
7866 // Convert to float.
7867 // float4 yf = vcvt_f32_s32(vmovl_s16(y));
7868 // float4 xf = vcvt_f32_s32(vmovl_s16(x));
7869 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N0);
7870 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N1);
7871 N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
7872 N1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);
7873
7874 // Use reciprocal estimate and one refinement step.
7875 // float4 recip = vrecpeq_f32(yf);
7876 // recip *= vrecpsq_f32(yf, recip);
7878 DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
7879 N1);
7881 DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
7882 N1, N2);
7883 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
7884 // Because short has a smaller range than ushort, we can actually get away
7885 // with only a single newton step. This requires that we use a weird bias
7886 // of 89, however (again, this has been exhaustively tested).
7887 // float4 result = as_float4(as_int4(xf*recip) + 0x89);
7888 N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
7889 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
7890 N1 = DAG.getConstant(0x89, dl, MVT::v4i32);
7891 N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
7892 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
7893 // Convert back to integer and return.
7894 // return vmovn_s32(vcvt_s32_f32(result));
7895 N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);
7896 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);
7897 return N0;
7898}
7899
7901 EVT VT = Op.getValueType();
7902 assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&
7903 "unexpected type for custom-lowering ISD::SDIV");
7904
7905 SDLoc dl(Op);
7906 SDValue N0 = Op.getOperand(0);
7907 SDValue N1 = Op.getOperand(1);
7908 SDValue N2, N3;
7909
7910 if (VT == MVT::v8i8) {
7911 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N0);
7912 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N1);
7913
7914 N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
7915 DAG.getIntPtrConstant(4, dl));
7916 N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
7917 DAG.getIntPtrConstant(4, dl));
7918 N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
7919 DAG.getIntPtrConstant(0, dl));
7920 N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
7921 DAG.getIntPtrConstant(0, dl));
7922
7923 N0 = LowerSDIV_v4i8(N0, N1, dl, DAG); // v4i16
7924 N2 = LowerSDIV_v4i8(N2, N3, dl, DAG); // v4i16
7925
7926 N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
7927 N0 = LowerCONCAT_VECTORS(N0, DAG);
7928
7929 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v8i8, N0);
7930 return N0;
7931 }
7932 return LowerSDIV_v4i16(N0, N1, dl, DAG);
7933}
7934
7936 // TODO: Should this propagate fast-math-flags?
7937 EVT VT = Op.getValueType();
7938 assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&
7939 "unexpected type for custom-lowering ISD::UDIV");
7940
7941 SDLoc dl(Op);
7942 SDValue N0 = Op.getOperand(0);
7943 SDValue N1 = Op.getOperand(1);
7944 SDValue N2, N3;
7945
7946 if (VT == MVT::v8i8) {
7947 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N0);
7948 N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N1);
7949
7950 N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
7951 DAG.getIntPtrConstant(4, dl));
7952 N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
7953 DAG.getIntPtrConstant(4, dl));
7954 N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
7955 DAG.getIntPtrConstant(0, dl));
7956 N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
7957 DAG.getIntPtrConstant(0, dl));
7958
7959 N0 = LowerSDIV_v4i16(N0, N1, dl, DAG); // v4i16
7960 N2 = LowerSDIV_v4i16(N2, N3, dl, DAG); // v4i16
7961
7962 N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
7963 N0 = LowerCONCAT_VECTORS(N0, DAG);
7964
7966 DAG.getConstant(Intrinsic::arm_neon_vqmovnsu, dl,
7967 MVT::i32),
7968 N0);
7969 return N0;
7970 }
7971
7972 // v4i16 sdiv ... Convert to float.
7973 // float4 yf = vcvt_f32_s32(vmovl_u16(y));
7974 // float4 xf = vcvt_f32_s32(vmovl_u16(x));
7975 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N0);
7976 N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N1);
7977 N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
7978 SDValue BN1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);
7979
7980 // Use reciprocal estimate and two refinement steps.
7981 // float4 recip = vrecpeq_f32(yf);
7982 // recip *= vrecpsq_f32(yf, recip);
7983 // recip *= vrecpsq_f32(yf, recip);
7985 DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
7986 BN1);
7988 DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
7989 BN1, N2);
7990 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
7992 DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
7993 BN1, N2);
7994 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
7995 // Simply multiplying by the reciprocal estimate can leave us a few ulps
7996 // too low, so we add 2 ulps (exhaustive testing shows that this is enough,
7997 // and that it will never cause us to return an answer too large).
7998 // float4 result = as_float4(as_int4(xf*recip) + 2);
7999 N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
8000 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
8001 N1 = DAG.getConstant(2, dl, MVT::v4i32);
8002 N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
8003 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
8004 // Convert back to integer and return.
8005 // return vmovn_u32(vcvt_s32_f32(result));
8006 N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);
8007 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);
8008 return N0;
8009}
8010
8012 SDNode *N = Op.getNode();
8013 EVT VT = N->getValueType(0);
8014 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
8015
8016 SDValue Carry = Op.getOperand(2);
8017
8018 SDLoc DL(Op);
8019
8020 SDValue Result;
8021 if (Op.getOpcode() == ISD::ADDCARRY) {
8022 // This converts the boolean value carry into the carry flag.
8023 Carry = ConvertBooleanCarryToCarryFlag(Carry, DAG);
8024
8025 // Do the addition proper using the carry flag we wanted.
8026 Result = DAG.getNode(ARMISD::ADDE, DL, VTs, Op.getOperand(0),
8027 Op.getOperand(1), Carry);
8028
8029 // Now convert the carry flag into a boolean value.
8030 Carry = ConvertCarryFlagToBooleanCarry(Result.getValue(1), VT, DAG);
8031 } else {
8032 // ARMISD::SUBE expects a carry not a borrow like ISD::SUBCARRY so we
8033 // have to invert the carry first.
8034 Carry = DAG.getNode(ISD::SUB, DL, MVT::i32,
8035 DAG.getConstant(1, DL, MVT::i32), Carry);
8036 // This converts the boolean value carry into the carry flag.
8037 Carry = ConvertBooleanCarryToCarryFlag(Carry, DAG);
8038
8039 // Do the subtraction proper using the carry flag we wanted.
8040 Result = DAG.getNode(ARMISD::SUBE, DL, VTs, Op.getOperand(0),
8041 Op.getOperand(1), Carry);
8042
8043 // Now convert the carry flag into a boolean value.
8044 Carry = ConvertCarryFlagToBooleanCarry(Result.getValue(1), VT, DAG);
8045 // But the carry returned by ARMISD::SUBE is not a borrow as expected
8046 // by ISD::SUBCARRY, so compute 1 - C.
8047 Carry = DAG.getNode(ISD::SUB, DL, MVT::i32,
8048 DAG.getConstant(1, DL, MVT::i32), Carry);
8049 }
8050
8051 // Return both values.
8052 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Carry);
8053}
8054
8055SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
8056 assert(Subtarget->isTargetDarwin());
8057
8058 // For iOS, we want to call an alternative entry point: __sincos_stret,
8059 // return values are passed via sret.
8060 SDLoc dl(Op);
8061 SDValue Arg = Op.getOperand(0);
8062 EVT ArgVT = Arg.getValueType();
8063 Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
8064 auto PtrVT = getPointerTy(DAG.getDataLayout());
8065
8067 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
8068
8069 // Pair of floats / doubles used to pass the result.
8070 Type *RetTy = StructType::get(ArgTy, ArgTy);
8071 auto &DL = DAG.getDataLayout();
8072
8074 bool ShouldUseSRet = Subtarget->isAPCS_ABI();
8075 SDValue SRet;
8076 if (ShouldUseSRet) {
8077 // Create stack object for sret.
8078 const uint64_t ByteSize = DL.getTypeAllocSize(RetTy);
8079 const unsigned StackAlign = DL.getPrefTypeAlignment(RetTy);
8080 int FrameIdx = MFI.CreateStackObject(ByteSize, StackAlign, false);
8081 SRet = DAG.getFrameIndex(FrameIdx, TLI.getPointerTy(DL));
8082
8083 ArgListEntry Entry;
8084 Entry.Node = SRet;
8085 Entry.Ty = RetTy->getPointerTo();
8086 Entry.IsSExt = false;
8087 Entry.IsZExt = false;
8088 Entry.IsSRet = true;
8089 Args.push_back(Entry);
8091 }
8092
8093 ArgListEntry Entry;
8094 Entry.Node = Arg;
8095 Entry.Ty = ArgTy;
8096 Entry.IsSExt = false;
8097 Entry.IsZExt = false;
8098 Args.push_back(Entry);
8099
8100 RTLIB::Libcall LC =
8101 (ArgVT == MVT::f64) ? RTLIB::SINCOS_STRET_F64 : RTLIB::SINCOS_STRET_F32;
8102 const char *LibcallName = getLibcallName(LC);
8104 SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy(DL));
8105
8107 CLI.setDebugLoc(dl)
8108 .setChain(DAG.getEntryNode())
8109 .setCallee(CC, RetTy, Callee, std::move(Args))
8110 .setDiscardResult(ShouldUseSRet);
8111 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
8112
8113 if (!ShouldUseSRet)
8114 return CallResult.first;
8115
8116 SDValue LoadSin =
8117 DAG.getLoad(ArgVT, dl, CallResult.second, SRet, MachinePointerInfo());
8118
8119 // Address of cos field.
8120 SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, SRet,
8121 DAG.getIntPtrConstant(ArgVT.getStoreSize(), dl));
8122 SDValue LoadCos =
8123 DAG.getLoad(ArgVT, dl, LoadSin.getValue(1), Add, MachinePointerInfo());
8124
8125 SDVTList Tys = DAG.getVTList(ArgVT, ArgVT);
8126 return DAG.getNode(ISD::MERGE_VALUES, dl, Tys,
8127 LoadSin.getValue(0), LoadCos.getValue(0));
8128}
8129
8130SDValue ARMTargetLowering::LowerWindowsDIVLibCall(SDValue Op, SelectionDAG &DAG,
8131 bool Signed,
8132 SDValue &Chain) const {
8133 EVT VT = Op.getValueType();
8134 assert((VT == MVT::i32 || VT == MVT::i64) &&
8135 "unexpected type for custom lowering DIV");
8136 SDLoc dl(Op);
8137
8138 const auto &DL = DAG.getDataLayout();
8139 const auto &TLI = DAG.getTargetLoweringInfo();
8140
8141 const char *Name = nullptr;
8142 if (Signed)
8143 Name = (VT == MVT::i32) ? "__rt_sdiv" : "__rt_sdiv64";
8144 else
8145 Name = (VT == MVT::i32) ? "__rt_udiv" : "__rt_udiv64";
8146
8147 SDValue ES = DAG.getExternalSymbol(Name, TLI.getPointerTy(DL));
8148
8150
8151 for (auto AI : {1, 0}) {
8152 ArgListEntry Arg;
8153 Arg.Node = Op.getOperand(AI);
8154 Arg.Ty = Arg.Node.getValueType().getTypeForEVT(*DAG.getContext());
8155 Args.push_back(Arg);
8156 }
8157
8158 CallLoweringInfo CLI(DAG);
8159 CLI.setDebugLoc(dl)
8160 .setChain(Chain)
8162 ES, std::move(Args));
8163
8164 return LowerCallTo(CLI).first;
8165}
8166
8167// This is a code size optimisation: return the original SDIV node to
8168// DAGCombiner when we don't want to expand SDIV into a sequence of
8169// instructions, and an empty node otherwise which will cause the
8170// SDIV to be expanded in DAGCombine.
8171SDValue
8172ARMTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
8173 SelectionDAG &DAG,
8174 SmallVectorImpl<SDNode *> &Created) const {
8175 // TODO: Support SREM
8176 if (N->getOpcode() != ISD::SDIV)
8177 return SDValue();
8178
8179 const auto &ST = static_cast<const ARMSubtarget&>(DAG.getSubtarget());
8180 const bool MinSize = ST.hasMinSize();
8181 const bool HasDivide = ST.isThumb() ? ST.hasDivideInThumbMode()
8182 : ST.hasDivideInARMMode();
8183
8184 // Don't touch vector types; rewriting this may lead to scalarizing
8185 // the int divs.
8186 if (N->getOperand(0).getValueType().isVector())
8187 return SDValue();
8188
8189 // Bail if MinSize is not set, and also for both ARM and Thumb mode we need
8190 // hwdiv support for this to be really profitable.
8191 if (!(MinSize && HasDivide))
8192 return SDValue();
8193
8194 // ARM mode is a bit simpler than Thumb: we can handle large power
8195 // of 2 immediates with 1 mov instruction; no further checks required,
8196 // just return the sdiv node.
8197 if (!ST.isThumb())
8198 return SDValue(N, 0);
8199
8200 // In Thumb mode, immediates larger than 128 need a wide 4-byte MOV,
8201 // and thus lose the code size benefits of a MOVS that requires only 2.
8202 // TargetTransformInfo and 'getIntImmCodeSizeCost' could be helpful here,
8203 // but as it's doing exactly this, it's not worth the trouble to get TTI.
8204 if (Divisor.sgt(128))
8205 return SDValue();
8206
8207 return SDValue(N, 0);
8208}
8209
8210SDValue ARMTargetLowering::LowerDIV_Windows(SDValue Op, SelectionDAG &DAG,
8211 bool Signed) const {
8212 assert(Op.getValueType() == MVT::i32 &&
8213 "unexpected type for custom lowering DIV");
8214 SDLoc dl(Op);
8215
8217 DAG.getEntryNode(), Op.getOperand(1));
8218
8219 return LowerWindowsDIVLibCall(Op, DAG, Signed, DBZCHK);
8220}
8221
8223 SDLoc DL(N);
8224 SDValue Op = N->getOperand(1);
8225 if (N->getValueType(0) == MVT::i32)
8226 return DAG.getNode(ARMISD::WIN__DBZCHK, DL, MVT::Other, InChain, Op);
8227 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Op,
8228 DAG.getConstant(0, DL, MVT::i32));
8229 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Op,
8230 DAG.getConstant(1, DL, MVT::i32));
8231 return DAG.getNode(ARMISD::WIN__DBZCHK, DL, MVT::Other, InChain,
8232 DAG.getNode(ISD::OR, DL, MVT::i32, Lo, Hi));
8233}
8234
8235void ARMTargetLowering::ExpandDIV_Windows(
8236 SDValue Op, SelectionDAG &DAG, bool Signed,
8238 const auto &DL = DAG.getDataLayout();
8239 const auto &TLI = DAG.getTargetLoweringInfo();
8240
8241 assert(Op.getValueType() == MVT::i64 &&
8242 "unexpected type for custom lowering DIV");
8243 SDLoc dl(Op);
8244
8245 SDValue DBZCHK = WinDBZCheckDenominator(DAG, Op.getNode(), DAG.getEntryNode());
8246
8247 SDValue Result = LowerWindowsDIVLibCall(Op, DAG, Signed, DBZCHK);
8248
8249 SDValue Lower = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Result);
8250 SDValue Upper = DAG.getNode(ISD::SRL, dl, MVT::i64, Result,
8251 DAG.getConstant(32, dl, TLI.getPointerTy(DL)));
8253
8254 Results.push_back(Lower);
8255 Results.push_back(Upper);
8256}
8257
8259 if (isStrongerThanMonotonic(cast<AtomicSDNode>(Op)->getOrdering()))
8260 // Acquire/Release load/store is not legal for targets without a dmb or
8261 // equivalent available.
8262 return SDValue();
8263
8264 // Monotonic load/store is legal for all targets.
8265 return Op;
8266}
8267
8270 SelectionDAG &DAG,
8271 const ARMSubtarget *Subtarget) {
8272 SDLoc DL(N);
8273 // Under Power Management extensions, the cycle-count is:
8274 // mrc p15, #0, <Rt>, c9, c13, #0
8275 SDValue Ops[] = { N->getOperand(0), // Chain
8276 DAG.getConstant(Intrinsic::arm_mrc, DL, MVT::i32),
8277 DAG.getConstant(15, DL, MVT::i32),
8278 DAG.getConstant(0, DL, MVT::i32),
8279 DAG.getConstant(9, DL, MVT::i32),
8280 DAG.getConstant(13, DL, MVT::i32),
8281 DAG.getConstant(0, DL, MVT::i32)
8282 };
8283
8284 SDValue Cycles32 = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
8285 DAG.getVTList(MVT::i32, MVT::Other), Ops);
8286 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Cycles32,
8287 DAG.getConstant(0, DL, MVT::i32)));
8288 Results.push_back(Cycles32.getValue(1));
8289}
8290
8292 SDLoc dl(V.getNode());
8293 SDValue VLo = DAG.getAnyExtOrTrunc(V, dl, MVT::i32);
8294 SDValue VHi = DAG.getAnyExtOrTrunc(
8295 DAG.getNode(ISD::SRL, dl, MVT::i64, V, DAG.getConstant(32, dl, MVT::i32)),
8296 dl, MVT::i32);
8297 bool isBigEndian = DAG.getDataLayout().isBigEndian();
8298 if (isBigEndian)
8299 std::swap (VLo, VHi);
8300 SDValue RegClass =
8301 DAG.getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
8302 SDValue SubReg0 = DAG.getTargetConstant(ARM::gsub_0, dl, MVT::i32);
8303 SDValue SubReg1 = DAG.getTargetConstant(ARM::gsub_1, dl, MVT::i32);
8304 const SDValue Ops[] = { RegClass, VLo, SubReg0, VHi, SubReg1 };
8305 return SDValue(
8306 DAG.getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::Untyped, Ops), 0);
8307}
8308
8311 SelectionDAG &DAG) {
8312 assert(N->getValueType(0) == MVT::i64 &&
8313 "AtomicCmpSwap on types less than 64 should be legal");
8314 SDValue Ops[] = {N->getOperand(1),
8315 createGPRPairNode(DAG, N->getOperand(2)),
8316 createGPRPairNode(DAG, N->getOperand(3)),
8317 N->getOperand(0)};
8318 SDNode *CmpSwap = DAG.getMachineNode(
8319 ARM::CMP_SWAP_64, SDLoc(N),
8321
8322 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
8323 DAG.setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
8324
8325 bool isBigEndian = DAG.getDataLayout().isBigEndian();
8326
8327 Results.push_back(
8328 DAG.getTargetExtractSubreg(isBigEndian ? ARM::gsub_1 : ARM::gsub_0,
8329 SDLoc(N), MVT::i32, SDValue(CmpSwap, 0)));
8330 Results.push_back(
8331 DAG.getTargetExtractSubreg(isBigEndian ? ARM::gsub_0 : ARM::gsub_1,
8332 SDLoc(N), MVT::i32, SDValue(CmpSwap, 0)));
8333 Results.push_back(SDValue(CmpSwap, 2));
8334}
8335
8336static SDValue LowerFPOWI(SDValue Op, const ARMSubtarget &Subtarget,
8337 SelectionDAG &DAG) {
8338 const auto &TLI = DAG.getTargetLoweringInfo();
8339
8340 assert(Subtarget.getTargetTriple().isOSMSVCRT() &&
8341 "Custom lowering is MSVCRT specific!");
8342
8343 SDLoc dl(Op);
8344 SDValue Val = Op.getOperand(0);
8345 MVT Ty = Val->getSimpleValueType(0);
8346 SDValue Exponent = DAG.getNode(ISD::SINT_TO_FP, dl, Ty, Op.getOperand(1));
8347 SDValue Callee = DAG.getExternalSymbol(Ty == MVT::f32 ? "powf" : "pow",
8348 TLI.getPointerTy(DAG.getDataLayout()));
8349
8352
8353 Entry.Node = Val;
8354 Entry.Ty = Val.getValueType().getTypeForEVT(*DAG.getContext());
8355 Entry.IsZExt = true;
8356 Args.push_back(Entry);
8357
8358 Entry.Node = Exponent;
8359 Entry.Ty = Exponent.getValueType().getTypeForEVT(*DAG.getContext());
8360 Entry.IsZExt = true;
8361 Args.push_back(Entry);
8362
8363 Type *LCRTy = Val.getValueType().getTypeForEVT(*DAG.getContext());
8364
8365 // In the in-chain to the call is the entry node If we are emitting a
8366 // tailcall, the chain will be mutated if the node has a non-entry input
8367 // chain.
8368 SDValue InChain = DAG.getEntryNode();
8369 SDValue TCChain = InChain;
8370
8371 const Function &F = DAG.getMachineFunction().getFunction();
8372 bool IsTC = TLI.isInTailCallPosition(DAG, Op.getNode(), TCChain) &&
8373 F.getReturnType() == LCRTy;
8374 if (IsTC)
8375 InChain = TCChain;
8376
8378 CLI.setDebugLoc(dl)
8379 .setChain(InChain)
8380 .setCallee(CallingConv::ARM_AAPCS_VFP, LCRTy, Callee, std::move(Args))
8381 .setTailCall(IsTC);
8382 std::pair<SDValue, SDValue> CI = TLI.LowerCallTo(CLI);
8383
8384 // Return the chain (the DAG root) if it is a tail call
8385 return !CI.second.getNode() ? DAG.getRoot() : CI.first;
8386}
8387
8389 LLVM_DEBUG(dbgs() << "Lowering node: "; Op.dump());
8390 switch (Op.getOpcode()) {
8391 default: llvm_unreachable("Don't know how to custom lower this!");
8392 case ISD::WRITE_REGISTER: return LowerWRITE_REGISTER(Op, DAG);
8393 case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
8394 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
8395 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
8396 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
8397 case ISD::SELECT: return LowerSELECT(Op, DAG);
8398 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
8399 case ISD::BRCOND: return LowerBRCOND(Op, DAG);
8400 case ISD::BR_CC: return LowerBR_CC(Op, DAG);
8401 case ISD::BR_JT: return LowerBR_JT(Op, DAG);
8402 case ISD::VASTART: return LowerVASTART(Op, DAG);
8403 case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG, Subtarget);
8404 case ISD::PREFETCH: return LowerPREFETCH(Op, DAG, Subtarget);
8405 case ISD::SINT_TO_FP:
8406 case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
8407 case ISD::FP_TO_SINT:
8408 case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG);
8409 case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
8410 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
8411 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
8412 case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG);
8413 case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG);
8414 case ISD::EH_SJLJ_SETUP_DISPATCH: return LowerEH_SJLJ_SETUP_DISPATCH(Op, DAG);
8415 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG,
8416 Subtarget);
8417 case ISD::BITCAST: return ExpandBITCAST(Op.getNode(), DAG, Subtarget);
8418 case ISD::SHL:
8419 case ISD::SRL:
8420 case ISD::SRA: return LowerShift(Op.getNode(), DAG, Subtarget);
8421 case ISD::SREM: return LowerREM(Op.getNode(), DAG);
8422 case ISD::UREM: return LowerREM(Op.getNode(), DAG);
8423 case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG);
8424 case ISD::SRL_PARTS:
8425 case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG);
8426 case ISD::CTTZ:
8427 case ISD::CTTZ_ZERO_UNDEF: return LowerCTTZ(Op.getNode(), DAG, Subtarget);
8428 case ISD::CTPOP: return LowerCTPOP(Op.getNode(), DAG, Subtarget);
8429 case ISD::SETCC: return LowerVSETCC(Op, DAG);
8430 case ISD::SETCCCARRY: return LowerSETCCCARRY(Op, DAG);
8431 case ISD::ConstantFP: return LowerConstantFP(Op, DAG, Subtarget);
8432 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG, Subtarget);
8433 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
8434 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
8436 case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
8437 case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
8438 case ISD::MUL: return LowerMUL(Op, DAG);
8439 case ISD::SDIV:
8440 if (Subtarget->isTargetWindows() && !Op.getValueType().isVector())
8441 return LowerDIV_Windows(Op, DAG, /* Signed */ true);
8442 return LowerSDIV(Op, DAG);
8443 case ISD::UDIV:
8444 if (Subtarget->isTargetWindows() && !Op.getValueType().isVector())
8445 return LowerDIV_Windows(Op, DAG, /* Signed */ false);
8446 return LowerUDIV(Op, DAG);
8447 case ISD::ADDCARRY:
8448 case ISD::SUBCARRY: return LowerADDSUBCARRY(Op, DAG);
8449 case ISD::SADDO:
8450 case ISD::SSUBO:
8451 return LowerSignedALUO(Op, DAG);
8452 case ISD::UADDO:
8453 case ISD::USUBO:
8454 return LowerUnsignedALUO(Op, DAG);
8455 case ISD::ATOMIC_LOAD:
8456 case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG);
8457 case ISD::FSINCOS: return LowerFSINCOS(Op, DAG);
8458 case ISD::SDIVREM:
8459 case ISD::UDIVREM: return LowerDivRem(Op, DAG);
8461 if (Subtarget->isTargetWindows())
8462 return LowerDYNAMIC_STACKALLOC(Op, DAG);
8463 llvm_unreachable("Don't know how to custom lower this!");
8464 case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG);
8465 case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
8466 case ISD::FPOWI: return LowerFPOWI(Op, *Subtarget, DAG);
8467 case ARMISD::WIN__DBZCHK: return SDValue();
8468 }
8469}
8470
8472 SelectionDAG &DAG) {
8473 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
8474 unsigned Opc = 0;
8475 if (IntNo == Intrinsic::arm_smlald)
8476 Opc = ARMISD::SMLALD;
8477 else if (IntNo == Intrinsic::arm_smlaldx)
8478 Opc = ARMISD::SMLALDX;
8479 else if (IntNo == Intrinsic::arm_smlsld)
8480 Opc = ARMISD::SMLSLD;
8481 else if (IntNo == Intrinsic::arm_smlsldx)
8482 Opc = ARMISD::SMLSLDX;
8483 else
8484 return;
8485
8486 SDLoc dl(N);
8488 N->getOperand(3),
8489 DAG.getConstant(0, dl, MVT::i32));
8491 N->getOperand(3),
8492 DAG.getConstant(1, dl, MVT::i32));
8493
8494 SDValue LongMul = DAG.getNode(Opc, dl,
8496 N->getOperand(1), N->getOperand(2),
8497 Lo, Hi);
8498 Results.push_back(LongMul.getValue(0));
8499 Results.push_back(LongMul.getValue(1));
8500}
8501
8502/// ReplaceNodeResults - Replace the results of node with an illegal result
8503/// type with new values built out of custom code.
8506 SelectionDAG &DAG) const {
8507 SDValue Res;
8508 switch (N->getOpcode()) {
8509 default:
8510 llvm_unreachable("Don't know how to custom expand this!");
8511 case ISD::READ_REGISTER:
8513 break;
8514 case ISD::BITCAST:
8515 Res = ExpandBITCAST(N, DAG, Subtarget);
8516 break;
8517 case ISD::SRL:
8518 case ISD::SRA:
8519 case ISD::SHL:
8520 Res = Expand64BitShift(N, DAG, Subtarget);
8521 break;
8522 case ISD::SREM:
8523 case ISD::UREM:
8524 Res = LowerREM(N, DAG);
8525 break;
8526 case ISD::SDIVREM:
8527 case ISD::UDIVREM:
8528 Res = LowerDivRem(SDValue(N, 0), DAG);
8529 assert(Res.getNumOperands() == 2 && "DivRem needs two values");
8530 Results.push_back(Res.getValue(0));
8531 Results.push_back(Res.getValue(1));
8532 return;
8534 ReplaceREADCYCLECOUNTER(N, Results, DAG, Subtarget);
8535 return;
8536 case ISD::UDIV:
8537 case ISD::SDIV:
8538 assert(Subtarget->isTargetWindows() && "can only expand DIV on Windows");
8539 return ExpandDIV_Windows(SDValue(N, 0), DAG, N->getOpcode() == ISD::SDIV,
8540 Results);
8543 return;
8545 return ReplaceLongIntrinsic(N, Results, DAG);
8546 case ISD::ABS:
8547 lowerABS(N, Results, DAG);
8548 return ;
8549
8550 }
8551 if (Res.getNode())
8552 Results.push_back(Res);
8553}
8554
8555//===----------------------------------------------------------------------===//
8556// ARM Scheduler Hooks
8557//===----------------------------------------------------------------------===//
8558
8559/// SetupEntryBlockForSjLj - Insert code into the entry block that creates and
8560/// registers the function context.
8561void ARMTargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI,
8562 MachineBasicBlock *MBB,
8563 MachineBasicBlock *DispatchBB,
8564 int FI) const {
8565 assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
8566 "ROPI/RWPI not currently supported with SjLj");
8567 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
8568 DebugLoc dl = MI.getDebugLoc();
8569 MachineFunction *MF = MBB->getParent();
8573 const Function &F = MF->getFunction();
8574
8575 bool isThumb = Subtarget->isThumb();
8576 bool isThumb2 = Subtarget->isThumb2();
8577
8578 unsigned PCLabelId = AFI->createPICLabelUId();
8579 unsigned PCAdj = (isThumb || isThumb2) ? 4 : 8;
8581 ARMConstantPoolMBB::Create(F.getContext(), DispatchBB, PCLabelId, PCAdj);
8582 unsigned CPI = MCP->getConstantPoolIndex(CPV, 4);
8583
8584 const TargetRegisterClass *TRC = isThumb ? &ARM::tGPRRegClass
8585 : &ARM::GPRRegClass;
8586
8587 // Grab constant pool and fixed stack memory operands.
8588 MachineMemOperand *CPMMO =
8591
8592 MachineMemOperand *FIMMOSt =
8595
8596 // Load the address of the dispatch MBB into the jump buffer.
8597 if (isThumb2) {
8598 // Incoming value: jbuf
8599 // ldr.n r5, LCPI1_1
8600 // orr r5, r5, #1
8601 // add r5, pc
8602 // str r5, [$jbuf, #+4] ; &jbuf[1]
8603 unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
8604 BuildMI(*MBB, MI, dl, TII->get(ARM::t2LDRpci), NewVReg1)
8606 .addMemOperand(CPMMO)
8608 // Set the low bit because of thumb mode.
8609 unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
8610 BuildMI(*MBB, MI, dl, TII->get(ARM::t2ORRri), NewVReg2)
8611 .addReg(NewVReg1, RegState::Kill)
8612 .addImm(0x01)
8614 .add(condCodeOp());
8615 unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
8616 BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg3)
8617 .addReg(NewVReg2, RegState::Kill)
8618 .addImm(PCLabelId);
8619 BuildMI(*MBB, MI, dl, TII->get(ARM::t2STRi12))
8620 .addReg(NewVReg3, RegState::Kill)
8621 .addFrameIndex(FI)
8622 .addImm(36) // &jbuf[1] :: pc
8623 .addMemOperand(FIMMOSt)
8625 } else if (isThumb) {
8626 // Incoming value: jbuf
8627 // ldr.n r1, LCPI1_4
8628 // add r1, pc
8629 // mov r2, #1
8630 // orrs r1, r2
8631 // add r2, $jbuf, #+4 ; &jbuf[1]
8632 // str r1, [r2]
8633 unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
8634 BuildMI(*MBB, MI, dl, TII->get(ARM::tLDRpci), NewVReg1)
8636 .addMemOperand(CPMMO)
8638 unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
8639 BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg2)
8640 .addReg(NewVReg1, RegState::Kill)
8641 .addImm(PCLabelId);
8642 // Set the low bit because of thumb mode.
8643 unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
8644 BuildMI(*MBB, MI, dl, TII->get(ARM::tMOVi8), NewVReg3)
8645 .addReg(ARM::CPSR, RegState::Define)
8646 .addImm(1)
8648 unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
8649 BuildMI(*MBB, MI, dl, TII->get(ARM::tORR), NewVReg4)
8650 .addReg(ARM::CPSR, RegState::Define)
8651 .addReg(NewVReg2, RegState::Kill)
8652 .addReg(NewVReg3, RegState::Kill)
8654 unsigned NewVReg5 = MRI->createVirtualRegister(TRC);
8655 BuildMI(*MBB, MI, dl, TII->get(ARM::tADDframe), NewVReg5)
8656 .addFrameIndex(FI)
8657 .addImm(36); // &jbuf[1] :: pc
8658 BuildMI(*MBB, MI, dl, TII->get(ARM::tSTRi))
8659 .addReg(NewVReg4, RegState::Kill)
8660 .addReg(NewVReg5, RegState::Kill)
8661 .addImm(0)
8662 .addMemOperand(FIMMOSt)
8664 } else {
8665 // Incoming value: jbuf
8666 // ldr r1, LCPI1_1
8667 // add r1, pc, r1
8668 // str r1, [$jbuf, #+4] ; &jbuf[1]
8669 unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
8670 BuildMI(*MBB, MI, dl, TII->get(ARM::LDRi12), NewVReg1)
8672 .addImm(0)
8673 .addMemOperand(CPMMO)
8675 unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
8676 BuildMI(*MBB, MI, dl, TII->get(ARM::PICADD), NewVReg2)
8677 .addReg(NewVReg1, RegState::Kill)
8678 .addImm(PCLabelId)
8680 BuildMI(*MBB, MI, dl, TII->get(ARM::STRi12))
8681 .addReg(NewVReg2, RegState::Kill)
8682 .addFrameIndex(FI)
8683 .addImm(36) // &jbuf[1] :: pc
8684 .addMemOperand(FIMMOSt)
8686 }
8687}
8688
8689void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
8690 MachineBasicBlock *MBB) const {
8691 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
8692 DebugLoc dl = MI.getDebugLoc();
8693 MachineFunction *MF = MBB->getParent();
8695 MachineFrameInfo &MFI = MF->getFrameInfo();
8696 int FI = MFI.getFunctionContextIndex();
8697
8698 const TargetRegisterClass *TRC = Subtarget->isThumb() ? &ARM::tGPRRegClass
8699 : &ARM::GPRnopcRegClass;
8700
8701 // Get a mapping of the call site numbers to all of the landing pads they're
8702 // associated with.
8704 unsigned MaxCSNum = 0;
8705 for (MachineFunction::iterator BB = MF->begin(), E = MF->end(); BB != E;
8706 ++BB) {
8707 if (!BB->isEHPad()) continue;
8708
8709 // FIXME: We should assert that the EH_LABEL is the first MI in the landing
8710 // pad.
8712 II = BB->begin(), IE = BB->end(); II != IE; ++II) {
8713 if (!II->isEHLabel()) continue;
8714
8715 MCSymbol *Sym = II->getOperand(0).getMCSymbol();
8716 if (!MF->hasCallSiteLandingPad(Sym)) continue;
8717
8718 SmallVectorImpl<unsigned> &CallSiteIdxs = MF->getCallSiteLandingPad(Sym);
8720 CSI = CallSiteIdxs.begin(), CSE = CallSiteIdxs.end();
8721 CSI != CSE; ++CSI) {
8722 CallSiteNumToLPad[*CSI].push_back(&*BB);
8723 MaxCSNum = std::max(MaxCSNum, *CSI);
8724 }
8725 break;
8726 }
8727 }
8728
8729 // Get an ordered list of the machine basic blocks for the jump table.
8730 std::vector<MachineBasicBlock*> LPadList;
8732 LPadList.reserve(CallSiteNumToLPad.size());
8733 for (unsigned I = 1; I <= MaxCSNum; ++I) {
8734 SmallVectorImpl<MachineBasicBlock*> &MBBList = CallSiteNumToLPad[I];
8736 II = MBBList.begin(), IE = MBBList.end(); II != IE; ++II) {
8737 LPadList.push_back(*II);
8738 InvokeBBs.insert((*II)->pred_begin(), (*II)->pred_end());
8739 }
8740 }
8741
8742 assert(!LPadList.empty() &&
8743 "No landing pad destinations for the dispatch jump table!");
8744
8745 // Create the jump table and associated information.
8748 unsigned MJTI = JTI->createJumpTableIndex(LPadList);
8749
8750 // Create the MBBs for the dispatch code.
8751
8752 // Shove the dispatch's address into the return slot in the function context.
8753 MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock();
8754 DispatchBB->setIsEHPad();
8755
8757 unsigned trap_opcode;
8758 if (Subtarget->isThumb())
8759 trap_opcode = ARM::tTRAP;
8760 else
8761 trap_opcode = Subtarget->useNaClTrap() ? ARM::TRAPNaCl : ARM::TRAP;
8762
8763 BuildMI(TrapBB, dl, TII->get(trap_opcode));
8764 DispatchBB->addSuccessor(TrapBB);
8765
8766 MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock();
8767 DispatchBB->addSuccessor(DispContBB);
8768
8769 // Insert and MBBs.
8770 MF->insert(MF->end(), DispatchBB);
8771 MF->insert(MF->end(), DispContBB);
8772 MF->insert(MF->end(), TrapBB);
8773
8774 // Insert code into the entry block that creates and registers the function
8775 // context.
8776 SetupEntryBlockForSjLj(MI, MBB, DispatchBB, FI);
8777
8781
8783 MIB = BuildMI(DispatchBB, dl, TII->get(ARM::Int_eh_sjlj_dispatchsetup));
8784
8785 const ARMBaseInstrInfo *AII = static_cast<const ARMBaseInstrInfo*>(TII);
8786 const ARMBaseRegisterInfo &RI = AII->getRegisterInfo();
8787
8788 // Add a register mask with no preserved registers. This results in all
8789 // registers being marked as clobbered. This can't work if the dispatch block
8790 // is in a Thumb1 function and is linked with ARM code which uses the FP
8791 // registers, as there is no way to preserve the FP registers in Thumb1 mode.
8793
8794 bool IsPositionIndependent = isPositionIndependent();
8795 unsigned NumLPads = LPadList.size();
8796 if (Subtarget->isThumb2()) {
8797 unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
8798 BuildMI(DispatchBB, dl, TII->get(ARM::t2LDRi12), NewVReg1)
8799 .addFrameIndex(FI)
8800 .addImm(4)
8801 .addMemOperand(FIMMOLd)
8803
8804 if (NumLPads < 256) {
8805 BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPri))
8806 .addReg(NewVReg1)
8807 .addImm(LPadList.size())
8809 } else {
8810 unsigned VReg1 = MRI->createVirtualRegister(TRC);
8811 BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVi16), VReg1)
8812 .addImm(NumLPads & 0xFFFF)
8814
8815 unsigned VReg2 = VReg1;
8816 if ((NumLPads & 0xFFFF0000) != 0) {
8817 VReg2 = MRI->createVirtualRegister(TRC);
8818 BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVTi16), VReg2)
8819 .addReg(VReg1)
8820 .addImm(NumLPads >> 16)
8822 }
8823
8824 BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPrr))
8825 .addReg(NewVReg1)
8826 .addReg(VReg2)
8828 }
8829
8830 BuildMI(DispatchBB, dl, TII->get(ARM::t2Bcc))
8831 .addMBB(TrapBB)
8833 .addReg(ARM::CPSR);
8834
8835 unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
8836 BuildMI(DispContBB, dl, TII->get(ARM::t2LEApcrelJT), NewVReg3)
8837 .addJumpTableIndex(MJTI)
8839
8840 unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
8841 BuildMI(DispContBB, dl, TII->get(ARM::t2ADDrs), NewVReg4)
8842 .addReg(NewVReg3, RegState::Kill)
8843 .addReg(NewVReg1)
8846 .add(condCodeOp());
8847
8848 BuildMI(DispContBB, dl, TII->get(ARM::t2BR_JT))
8849 .addReg(NewVReg4, RegState::Kill)
8850 .addReg(NewVReg1)
8851 .addJumpTableIndex(MJTI);
8852 } else if (Subtarget->isThumb()) {
8853 unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
8854 BuildMI(DispatchBB, dl, TII->get(ARM::tLDRspi), NewVReg1)
8855 .addFrameIndex(FI)
8856 .addImm(1)
8857 .addMemOperand(FIMMOLd)
8859
8860 if (NumLPads < 256) {
8861 BuildMI(DispatchBB, dl, TII->get(ARM::tCMPi8))
8862 .addReg(NewVReg1)
8863 .addImm(NumLPads)
8865 } else {
8867 Type *Int32Ty = Type::getInt32Ty(MF->getFunction().getContext());
8868 const Constant *C = ConstantInt::get(Int32Ty, NumLPads);
8869
8870 // MachineConstantPool wants an explicit alignment.
8872 if (Align == 0)
8873 Align = MF->getDataLayout().getTypeAllocSize(C->getType());
8874 unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
8875
8876 unsigned VReg1 = MRI->createVirtualRegister(TRC);
8877 BuildMI(DispatchBB, dl, TII->get(ARM::tLDRpci))
8878 .addReg(VReg1, RegState::Define)
8881 BuildMI(DispatchBB, dl, TII->get(ARM::tCMPr))
8882 .addReg(NewVReg1)
8883 .addReg(VReg1)
8885 }
8886
8887 BuildMI(DispatchBB, dl, TII->get(ARM::tBcc))
8888 .addMBB(TrapBB)
8890 .addReg(ARM::CPSR);
8891
8892 unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
8893 BuildMI(DispContBB, dl, TII->get(ARM::tLSLri), NewVReg2)
8894 .addReg(ARM::CPSR, RegState::Define)
8895 .addReg(NewVReg1)
8896 .addImm(2)
8898
8899 unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
8900 BuildMI(DispContBB, dl, TII->get(ARM::tLEApcrelJT), NewVReg3)
8901 .addJumpTableIndex(MJTI)
8903
8904 unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
8905 BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg4)
8906 .addReg(ARM::CPSR, RegState::Define)
8907 .addReg(NewVReg2, RegState::Kill)
8908 .addReg(NewVReg3)
8910
8913
8914 unsigned NewVReg5 = MRI->createVirtualRegister(TRC);
8915 BuildMI(DispContBB, dl, TII->get(ARM::tLDRi), NewVReg5)
8916 .addReg(NewVReg4, RegState::Kill)
8917 .addImm(0)
8918 .addMemOperand(JTMMOLd)
8920
8921 unsigned NewVReg6 = NewVReg5;
8922 if (IsPositionIndependent) {
8923 NewVReg6 = MRI->createVirtualRegister(TRC);
8924 BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg6)
8925 .addReg(ARM::CPSR, RegState::Define)
8926 .addReg(NewVReg5, RegState::Kill)
8927 .addReg(NewVReg3)
8929 }
8930
8931 BuildMI(DispContBB, dl, TII->get(ARM::tBR_JTr))
8932 .addReg(NewVReg6, RegState::Kill)
8933 .addJumpTableIndex(MJTI);
8934 } else {
8935 unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
8936 BuildMI(DispatchBB, dl, TII->get(ARM::LDRi12), NewVReg1)
8937 .addFrameIndex(FI)
8938 .addImm(4)
8939 .addMemOperand(FIMMOLd)
8941
8942 if (NumLPads < 256) {
8943 BuildMI(DispatchBB, dl, TII->get(ARM::CMPri))
8944 .addReg(NewVReg1)
8945 .addImm(NumLPads)
8947 } else if (Subtarget->hasV6T2Ops() && isUInt<16>(NumLPads)) {
8948 unsigned VReg1 = MRI->createVirtualRegister(TRC);
8949 BuildMI(DispatchBB, dl, TII->get(ARM::MOVi16), VReg1)
8950 .addImm(NumLPads & 0xFFFF)
8952
8953 unsigned VReg2 = VReg1;
8954 if ((NumLPads & 0xFFFF0000) != 0) {
8955 VReg2 = MRI->createVirtualRegister(TRC);
8956 BuildMI(DispatchBB, dl, TII->get(ARM::MOVTi16), VReg2)
8957 .addReg(VReg1)
8958 .addImm(NumLPads >> 16)
8960 }
8961
8962 BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr))
8963 .addReg(NewVReg1)
8964 .addReg(VReg2)
8966 } else {
8968 Type *Int32Ty = Type::getInt32Ty(MF->getFunction().getContext());
8969 const Constant *C = ConstantInt::get(Int32Ty, NumLPads);
8970
8971 // MachineConstantPool wants an explicit alignment.
8973 if (Align == 0)
8974 Align = MF->getDataLayout().getTypeAllocSize(C->getType());
8975 unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
8976
8977 unsigned VReg1 = MRI->createVirtualRegister(TRC);
8978 BuildMI(DispatchBB, dl, TII->get(ARM::LDRcp))
8979 .addReg(VReg1, RegState::Define)
8981 .addImm(0)
8983 BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr))
8984 .addReg(NewVReg1)
8985 .addReg(VReg1, RegState::Kill)
8987 }
8988
8989 BuildMI(DispatchBB, dl, TII->get(ARM::Bcc))
8990 .addMBB(TrapBB)
8992 .addReg(ARM::CPSR);
8993
8994 unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
8995 BuildMI(DispContBB, dl, TII->get(ARM::MOVsi), NewVReg3)
8996 .addReg(NewVReg1)
8999 .add(condCodeOp());
9000 unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
9001 BuildMI(DispContBB, dl, TII->get(ARM::LEApcrelJT), NewVReg4)
9002 .addJumpTableIndex(MJTI)
9004
9007 unsigned NewVReg5 = MRI->createVirtualRegister(TRC);
9008 BuildMI(DispContBB, dl, TII->get(ARM::LDRrs), NewVReg5)
9009 .addReg(NewVReg3, RegState::Kill)
9010 .addReg(NewVReg4)
9011 .addImm(0)
9012 .addMemOperand(JTMMOLd)
9014
9015 if (IsPositionIndependent) {
9016 BuildMI(DispContBB, dl, TII->get(ARM::BR_JTadd))
9017 .addReg(NewVReg5, RegState::Kill)
9018 .addReg(NewVReg4)
9019 .addJumpTableIndex(MJTI);
9020 } else {
9021 BuildMI(DispContBB, dl, TII->get(ARM::BR_JTr))
9022 .addReg(NewVReg5, RegState::Kill)
9023 .addJumpTableIndex(MJTI);
9024 }
9025 }
9026
9027 // Add the jump table entries as successors to the MBB.
9029 for (std::vector<MachineBasicBlock*>::iterator
9030 I = LPadList.begin(), E = LPadList.end(); I != E; ++I) {
9031 MachineBasicBlock *CurMBB = *I;
9032 if (SeenMBBs.insert(CurMBB).second)
9033 DispContBB->addSuccessor(CurMBB);
9034 }
9035
9036 // N.B. the order the invoke BBs are processed in doesn't matter here.
9037 const MCPhysReg *SavedRegs = RI.getCalleeSavedRegs(MF);
9039 for (MachineBasicBlock *BB : InvokeBBs) {
9040
9041 // Remove the landing pad successor from the invoke block and replace it
9042 // with the new dispatch block.
9043 SmallVector<MachineBasicBlock*, 4> Successors(BB->succ_begin(),
9044 BB->succ_end());
9045 while (!Successors.empty()) {
9046 MachineBasicBlock *SMBB = Successors.pop_back_val();
9047 if (SMBB->isEHPad()) {
9048 BB->removeSuccessor(SMBB);
9049 MBBLPads.push_back(SMBB);
9050 }
9051 }
9052
9053 BB->addSuccessor(DispatchBB, BranchProbability::getZero());
9054 BB->normalizeSuccProbs();
9055
9056 // Find the invoke call and mark all of the callee-saved registers as
9057 // 'implicit defined' so that they're spilled. This prevents code from
9058 // moving instructions to before the EH block, where they will never be
9059 // executed.
9061 II = BB->rbegin(), IE = BB->rend(); II != IE; ++II) {
9062 if (!II->isCall()) continue;
9063
9066 OI = II->operands_begin(), OE = II->operands_end();
9067 OI != OE; ++OI) {
9068 if (!OI->isReg()) continue;
9069 DefRegs[OI->getReg()] = true;
9070 }
9071
9072 MachineInstrBuilder MIB(*MF, &*II);
9073
9074 for (unsigned i = 0; SavedRegs[i] != 0; ++i) {
9075 unsigned Reg = SavedRegs[i];
9076 if (Subtarget->isThumb2() &&
9077 !ARM::tGPRRegClass.contains(Reg) &&
9078 !ARM::hGPRRegClass.contains(Reg))
9079 continue;
9080 if (Subtarget->isThumb1Only() && !ARM::tGPRRegClass.contains(Reg))
9081 continue;
9082 if (!Subtarget->isThumb() && !ARM::GPRRegClass.contains(Reg))
9083 continue;
9084 if (!DefRegs[Reg])
9086 }
9087
9088 break;
9089 }
9090 }
9091
9092 // Mark all former landing pads as non-landing pads. The dispatch is the only
9093 // landing pad now.
9095 I = MBBLPads.begin(), E = MBBLPads.end(); I != E; ++I)
9096 (*I)->setIsEHPad(false);
9097
9098 // The instruction is gone now.
9099 MI.eraseFromParent();
9100}
9101
9102static
9105 E = MBB->succ_end(); I != E; ++I)
9106 if (*I != Succ)
9107 return *I;
9108 llvm_unreachable("Expecting a BB with two successors!");
9109}
9110
9111/// Return the load opcode for a given load size. If load size >= 8,
9112/// neon opcode will be returned.
9113static unsigned getLdOpcode(unsigned LdSize, bool IsThumb1, bool IsThumb2) {
9114 if (LdSize >= 8)
9115 return LdSize == 16 ? ARM::VLD1q32wb_fixed
9116 : LdSize == 8 ? ARM::VLD1d32wb_fixed : 0;
9117 if (IsThumb1)
9118 return LdSize == 4 ? ARM::tLDRi
9119 : LdSize == 2 ? ARM::tLDRHi
9120 : LdSize == 1 ? ARM::tLDRBi : 0;
9121 if (IsThumb2)
9122 return LdSize == 4 ? ARM::t2LDR_POST
9123 : LdSize == 2 ? ARM::t2LDRH_POST
9124 : LdSize == 1 ? ARM::t2LDRB_POST : 0;
9125 return LdSize == 4 ? ARM::LDR_POST_IMM
9126 : LdSize == 2 ? ARM::LDRH_POST
9127 : LdSize == 1 ? ARM::LDRB_POST_IMM : 0;
9128}
9129
9130/// Return the store opcode for a given store size. If store size >= 8,
9131/// neon opcode will be returned.
9132static unsigned getStOpcode(unsigned StSize, bool IsThumb1, bool IsThumb2) {
9133 if (StSize >= 8)
9134 return StSize == 16 ? ARM::VST1q32wb_fixed
9135 : StSize == 8 ? ARM::VST1d32wb_fixed : 0;
9136 if (IsThumb1)
9137 return StSize == 4 ? ARM::tSTRi
9138 : StSize == 2 ? ARM::tSTRHi
9139 : StSize == 1 ? ARM::tSTRBi : 0;
9140 if (IsThumb2)
9141 return StSize == 4 ? ARM::t2STR_POST
9142 : StSize == 2 ? ARM::t2STRH_POST
9143 : StSize == 1 ? ARM::t2STRB_POST : 0;
9144 return StSize == 4 ? ARM::STR_POST_IMM
9145 : StSize == 2 ? ARM::STRH_POST
9146 : StSize == 1 ? ARM::STRB_POST_IMM : 0;
9147}
9148
9149/// Emit a post-increment load operation with given size. The instructions
9150/// will be added to BB at Pos.
9152 const TargetInstrInfo *TII, const DebugLoc &dl,
9153 unsigned LdSize, unsigned Data, unsigned AddrIn,
9154 unsigned AddrOut, bool IsThumb1, bool IsThumb2) {
9155 unsigned LdOpc = getLdOpcode(LdSize, IsThumb1, IsThumb2);
9156 assert(LdOpc != 0 && "Should have a load opcode");
9157 if (LdSize >= 8) {
9158 BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
9159 .addReg(AddrOut, RegState::Define)
9160 .addReg(AddrIn)
9161 .addImm(0)
9163 } else if (IsThumb1) {
9164 // load + update AddrIn
9165 BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
9166 .addReg(AddrIn)
9167 .addImm(0)
9169 BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut)
9170 .add(t1CondCodeOp())
9171 .addReg(AddrIn)
9172 .addImm(LdSize)
9174 } else if (IsThumb2) {
9175 BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
9176 .addReg(AddrOut, RegState::Define)
9177 .addReg(AddrIn)
9178 .addImm(LdSize)
9180 } else { // arm
9181 BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
9182 .addReg(AddrOut, RegState::Define)
9183 .addReg(AddrIn)
9184 .addReg(0)
9185 .addImm(LdSize)
9187 }
9188}
9189
9190/// Emit a post-increment store operation with given size. The instructions
9191/// will be added to BB at Pos.
9193 const TargetInstrInfo *TII, const DebugLoc &dl,
9194 unsigned StSize, unsigned Data, unsigned AddrIn,
9195 unsigned AddrOut, bool IsThumb1, bool IsThumb2) {
9196 unsigned StOpc = getStOpcode(StSize, IsThumb1, IsThumb2);
9197 assert(StOpc != 0 && "Should have a store opcode");
9198 if (StSize >= 8) {
9199 BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
9200 .addReg(AddrIn)
9201 .addImm(0)
9202 .addReg(Data)
9204 } else if (IsThumb1) {
9205 // store + update AddrIn
9206 BuildMI(*BB, Pos, dl, TII->get(StOpc))
9207 .addReg(Data)
9208 .addReg(AddrIn)
9209 .addImm(0)
9211 BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut)
9212 .add(t1CondCodeOp())
9213 .addReg(AddrIn)
9214 .addImm(StSize)
9216 } else if (IsThumb2) {
9217 BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
9218 .addReg(Data)
9219 .addReg(AddrIn)
9220 .addImm(StSize)
9222 } else { // arm
9223 BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
9224 .addReg(Data)
9225 .addReg(AddrIn)
9226 .addReg(0)
9227 .addImm(StSize)
9229 }
9230}
9231
9233ARMTargetLowering::EmitStructByval(MachineInstr &MI,
9234 MachineBasicBlock *BB) const {
9235 // This pseudo instruction has 3 operands: dst, src, size
9236 // We expand it to a loop if size > Subtarget->getMaxInlineSizeThreshold().
9237 // Otherwise, we will generate unrolled scalar copies.
9238 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
9239 const BasicBlock *LLVM_BB = BB->getBasicBlock();
9241
9242 unsigned dest = MI.getOperand(0).getReg();
9243 unsigned src = MI.getOperand(1).getReg();
9244 unsigned SizeVal = MI.getOperand(2).getImm();
9245 unsigned Align = MI.getOperand(3).getImm();
9246 DebugLoc dl = MI.getDebugLoc();
9247
9248 MachineFunction *MF = BB->getParent();
9250 unsigned UnitSize = 0;
9251 const TargetRegisterClass *TRC = nullptr;
9252 const TargetRegisterClass *VecTRC = nullptr;
9253
9254 bool IsThumb1 = Subtarget->isThumb1Only();
9255 bool IsThumb2 = Subtarget->isThumb2();
9256 bool IsThumb = Subtarget->isThumb();
9257
9258 if (Align & 1) {
9259 UnitSize = 1;
9260 } else if (Align & 2) {
9261 UnitSize = 2;
9262 } else {
9263 // Check whether we can use NEON instructions.
9264 if (!MF->getFunction().hasFnAttribute(Attribute::NoImplicitFloat) &&
9265 Subtarget->hasNEON()) {
9266 if ((Align % 16 == 0) && SizeVal >= 16)
9267 UnitSize = 16;
9268 else if ((Align % 8 == 0) && SizeVal >= 8)
9269 UnitSize = 8;
9270 }
9271 // Can't use NEON instructions.
9272 if (UnitSize == 0)
9273 UnitSize = 4;
9274 }
9275
9276 // Select the correct opcode and register class for unit size load/store
9277 bool IsNeon = UnitSize >= 8;
9278 TRC = IsThumb ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
9279 if (IsNeon)
9280 VecTRC = UnitSize == 16 ? &ARM::DPairRegClass
9281 : UnitSize == 8 ? &ARM::DPRRegClass
9282 : nullptr;
9283
9284 unsigned BytesLeft = SizeVal % UnitSize;
9285 unsigned LoopSize = SizeVal - BytesLeft;
9286
9287 if (SizeVal <= Subtarget->getMaxInlineSizeThreshold()) {
9288 // Use LDR and STR to copy.
9289 // [scratch, srcOut] = LDR_POST(srcIn, UnitSize)
9290 // [destOut] = STR_POST(scratch, destIn, UnitSize)
9291 unsigned srcIn = src;
9292 unsigned destIn = dest;
9293 for (unsigned i = 0; i < LoopSize; i+=UnitSize) {
9294 unsigned srcOut = MRI.createVirtualRegister(TRC);
9295 unsigned destOut = MRI.createVirtualRegister(TRC);
9296 unsigned scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC);
9297 emitPostLd(BB, MI, TII, dl, UnitSize, scratch, srcIn, srcOut,
9298 IsThumb1, IsThumb2);
9299 emitPostSt(BB, MI, TII, dl, UnitSize, scratch, destIn, destOut,
9300 IsThumb1, IsThumb2);
9301 srcIn = srcOut;
9302 destIn = destOut;
9303 }
9304
9305 // Handle the leftover bytes with LDRB and STRB.
9306 // [scratch, srcOut] = LDRB_POST(srcIn, 1)
9307 // [destOut] = STRB_POST(scratch, destIn, 1)
9308 for (unsigned i = 0; i < BytesLeft; i++) {
9309 unsigned srcOut = MRI.createVirtualRegister(TRC);
9310 unsigned destOut = MRI.createVirtualRegister(TRC);
9311 unsigned scratch = MRI.createVirtualRegister(TRC);
9312 emitPostLd(BB, MI, TII, dl, 1, scratch, srcIn, srcOut,
9313 IsThumb1, IsThumb2);
9314 emitPostSt(BB, MI, TII, dl, 1, scratch, destIn, destOut,
9315 IsThumb1, IsThumb2);
9316 srcIn = srcOut;
9317 destIn = destOut;
9318 }
9319 MI.eraseFromParent(); // The instruction is gone now.
9320 return BB;
9321 }
9322
9323 // Expand the pseudo op to a loop.
9324 // thisMBB:
9325 // ...
9326 // movw varEnd, # --> with thumb2
9327 // movt varEnd, #
9328 // ldrcp varEnd, idx --> without thumb2
9329 // fallthrough --> loopMBB
9330 // loopMBB:
9331 // PHI varPhi, varEnd, varLoop
9332 // PHI srcPhi, src, srcLoop
9333 // PHI destPhi, dst, destLoop
9334 // [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize)
9335 // [destLoop] = STR_POST(scratch, destPhi, UnitSize)
9336 // subs varLoop, varPhi, #UnitSize
9337 // bne loopMBB
9338 // fallthrough --> exitMBB
9339 // exitMBB:
9340 // epilogue to handle left-over bytes
9341 // [scratch, srcOut] = LDRB_POST(srcLoop, 1)
9342 // [destOut] = STRB_POST(scratch, destLoop, 1)
9343 MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
9344 MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
9345 MF->insert(It, loopMBB);
9346 MF->insert(It, exitMBB);
9347
9348 // Transfer the remainder of BB and its successor edges to exitMBB.
9349 exitMBB->splice(exitMBB->begin(), BB,
9350 std::next(MachineBasicBlock::iterator(MI)), BB->end());
9352
9353 // Load an immediate to varEnd.
9354 unsigned varEnd = MRI.createVirtualRegister(TRC);
9355 if (Subtarget->useMovt()) {
9356 unsigned Vtmp = varEnd;
9357 if ((LoopSize & 0xFFFF0000) != 0)
9358 Vtmp = MRI.createVirtualRegister(TRC);
9359 BuildMI(BB, dl, TII->get(IsThumb ? ARM::t2MOVi16 : ARM::MOVi16), Vtmp)
9360 .addImm(LoopSize & 0xFFFF)
9362
9363 if ((LoopSize & 0xFFFF0000) != 0)
9364 BuildMI(BB, dl, TII->get(IsThumb ? ARM::t2MOVTi16 : ARM::MOVTi16), varEnd)
9365 .addReg(Vtmp)
9366 .addImm(LoopSize >> 16)
9368 } else {
9370 Type *Int32Ty = Type::getInt32Ty(MF->getFunction().getContext());
9371 const Constant *C = ConstantInt::get(Int32Ty, LoopSize);
9372
9373 // MachineConstantPool wants an explicit alignment.
9375 if (Align == 0)
9376 Align = MF->getDataLayout().getTypeAllocSize(C->getType());
9377 unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
9378 MachineMemOperand *CPMMO =
9381
9382 if (IsThumb)
9383 BuildMI(*BB, MI, dl, TII->get(ARM::tLDRpci))
9384 .addReg(varEnd, RegState::Define)
9387 .addMemOperand(CPMMO);
9388 else
9389 BuildMI(*BB, MI, dl, TII->get(ARM::LDRcp))
9390 .addReg(varEnd, RegState::Define)
9392 .addImm(0)
9394 .addMemOperand(CPMMO);
9395 }
9396 BB->addSuccessor(loopMBB);
9397
9398 // Generate the loop body:
9399 // varPhi = PHI(varLoop, varEnd)
9400 // srcPhi = PHI(srcLoop, src)
9401 // destPhi = PHI(destLoop, dst)
9402 MachineBasicBlock *entryBB = BB;
9403 BB = loopMBB;
9404 unsigned varLoop = MRI.createVirtualRegister(TRC);
9405 unsigned varPhi = MRI.createVirtualRegister(TRC);
9406 unsigned srcLoop = MRI.createVirtualRegister(TRC);
9407 unsigned srcPhi = MRI.createVirtualRegister(TRC);
9408 unsigned destLoop = MRI.createVirtualRegister(TRC);
9409 unsigned destPhi = MRI.createVirtualRegister(TRC);
9410
9411 BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), varPhi)
9412 .addReg(varLoop).addMBB(loopMBB)
9413 .addReg(varEnd).addMBB(entryBB);
9414 BuildMI(BB, dl, TII->get(ARM::PHI), srcPhi)
9415 .addReg(srcLoop).addMBB(loopMBB)
9416 .addReg(src).addMBB(entryBB);
9417 BuildMI(BB, dl, TII->get(ARM::PHI), destPhi)
9418 .addReg(destLoop).addMBB(loopMBB)
9419 .addReg(dest).addMBB(entryBB);
9420
9421 // [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize)
9422 // [destLoop] = STR_POST(scratch, destPhi, UnitSiz)
9423 unsigned scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC);
9424 emitPostLd(BB, BB->end(), TII, dl, UnitSize, scratch, srcPhi, srcLoop,
9425 IsThumb1, IsThumb2);
9426 emitPostSt(BB, BB->end(), TII, dl, UnitSize, scratch, destPhi, destLoop,
9427 IsThumb1, IsThumb2);
9428
9429 // Decrement loop variable by UnitSize.
9430 if (IsThumb1) {
9431 BuildMI(*BB, BB->end(), dl, TII->get(ARM::tSUBi8), varLoop)
9432 .add(t1CondCodeOp())
9433 .addReg(varPhi)
9434 .addImm(UnitSize)
9436 } else {
9438 BuildMI(*BB, BB->end(), dl,
9439 TII->get(IsThumb2 ? ARM::t2SUBri : ARM::SUBri), varLoop);
9440 MIB.addReg(varPhi)
9441 .addImm(UnitSize)
9443 .add(condCodeOp());
9444 MIB->getOperand(5).setReg(ARM::CPSR);
9445 MIB->getOperand(5).setIsDef(true);
9446 }
9447 BuildMI(*BB, BB->end(), dl,
9448 TII->get(IsThumb1 ? ARM::tBcc : IsThumb2 ? ARM::t2Bcc : ARM::Bcc))
9449 .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
9450
9451 // loopMBB can loop back to loopMBB or fall through to exitMBB.
9452 BB->addSuccessor(loopMBB);
9453 BB->addSuccessor(exitMBB);
9454
9455 // Add epilogue to handle BytesLeft.
9456 BB = exitMBB;
9457 auto StartOfExit = exitMBB->begin();
9458
9459 // [scratch, srcOut] = LDRB_POST(srcLoop, 1)
9460 // [destOut] = STRB_POST(scratch, destLoop, 1)
9461 unsigned srcIn = srcLoop;
9462 unsigned destIn = destLoop;
9463 for (unsigned i = 0; i < BytesLeft; i++) {
9464 unsigned srcOut = MRI.createVirtualRegister(TRC);
9465 unsigned destOut = MRI.createVirtualRegister(TRC);
9466 unsigned scratch = MRI.createVirtualRegister(TRC);
9467 emitPostLd(BB, StartOfExit, TII, dl, 1, scratch, srcIn, srcOut,
9468 IsThumb1, IsThumb2);
9469 emitPostSt(BB, StartOfExit, TII, dl, 1, scratch, destIn, destOut,
9470 IsThumb1, IsThumb2);
9471 srcIn = srcOut;
9472 destIn = destOut;
9473 }
9474
9475 MI.eraseFromParent(); // The instruction is gone now.
9476 return BB;
9477}
9478
9480ARMTargetLowering::EmitLowered__chkstk(MachineInstr &MI,
9481 MachineBasicBlock *MBB) const {
9483 const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
9484 DebugLoc DL = MI.getDebugLoc();
9485
9486 assert(Subtarget->isTargetWindows() &&
9487 "__chkstk is only supported on Windows");
9488 assert(Subtarget->isThumb2() && "Windows on ARM requires Thumb-2 mode");
9489
9490 // __chkstk takes the number of words to allocate on the stack in R4, and
9491 // returns the stack adjustment in number of bytes in R4. This will not
9492 // clober any other registers (other than the obvious lr).
9493 //
9494 // Although, technically, IP should be considered a register which may be
9495 // clobbered, the call itself will not touch it. Windows on ARM is a pure
9496 // thumb-2 environment, so there is no interworking required. As a result, we
9497 // do not expect a veneer to be emitted by the linker, clobbering IP.
9498 //
9499 // Each module receives its own copy of __chkstk, so no import thunk is
9500 // required, again, ensuring that IP is not clobbered.
9501 //
9502 // Finally, although some linkers may theoretically provide a trampoline for
9503 // out of range calls (which is quite common due to a 32M range limitation of
9504 // branches for Thumb), we can generate the long-call version via
9505 // -mcmodel=large, alleviating the need for the trampoline which may clobber
9506 // IP.
9507
9508 switch (TM.getCodeModel()) {
9509 case CodeModel::Tiny:
9510 llvm_unreachable("Tiny code model not available on ARM.");
9511 case CodeModel::Small:
9512 case CodeModel::Medium:
9513 case CodeModel::Kernel:
9514 BuildMI(*MBB, MI, DL, TII.get(ARM::tBL))
9516 .addExternalSymbol("__chkstk")
9519 .addReg(ARM::R12,
9521 .addReg(ARM::CPSR,
9523 break;
9524 case CodeModel::Large: {
9526 unsigned Reg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
9527
9528 BuildMI(*MBB, MI, DL, TII.get(ARM::t2MOVi32imm), Reg)
9529 .addExternalSymbol("__chkstk");
9530 BuildMI(*MBB, MI, DL, TII.get(ARM::tBLXr))
9532 .addReg(Reg, RegState::Kill)
9535 .addReg(ARM::R12,
9537 .addReg(ARM::CPSR,
9539 break;
9540 }
9541 }
9542
9543 BuildMI(*MBB, MI, DL, TII.get(ARM::t2SUBrr), ARM::SP)
9544 .addReg(ARM::SP, RegState::Kill)
9545 .addReg(ARM::R4, RegState::Kill)
9548 .add(condCodeOp());
9549
9550 MI.eraseFromParent();
9551 return MBB;
9552}
9553
9555ARMTargetLowering::EmitLowered__dbzchk(MachineInstr &MI,
9556 MachineBasicBlock *MBB) const {
9557 DebugLoc DL = MI.getDebugLoc();
9558 MachineFunction *MF = MBB->getParent();
9559 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
9560
9562 MF->insert(++MBB->getIterator(), ContBB);
9563 ContBB->splice(ContBB->begin(), MBB,
9564 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
9566 MBB->addSuccessor(ContBB);
9567
9569 BuildMI(TrapBB, DL, TII->get(ARM::t__brkdiv0));
9570 MF->push_back(TrapBB);
9571 MBB->addSuccessor(TrapBB);
9572
9573 BuildMI(*MBB, MI, DL, TII->get(ARM::tCMPi8))
9574 .addReg(MI.getOperand(0).getReg())
9575 .addImm(0)
9577 BuildMI(*MBB, MI, DL, TII->get(ARM::t2Bcc))
9578 .addMBB(TrapBB)
9580 .addReg(ARM::CPSR);
9581
9582 MI.eraseFromParent();
9583 return ContBB;
9584}
9585
9586// The CPSR operand of SelectItr might be missing a kill marker
9587// because there were multiple uses of CPSR, and ISel didn't know
9588// which to mark. Figure out whether SelectItr should have had a
9589// kill marker, and set it if it should. Returns the correct kill
9590// marker value.
9593 const TargetRegisterInfo* TRI) {
9594 // Scan forward through BB for a use/def of CPSR.
9595 MachineBasicBlock::iterator miI(std::next(SelectItr));
9596 for (MachineBasicBlock::iterator miE = BB->end(); miI != miE; ++miI) {
9597 const MachineInstr& mi = *miI;
9598 if (mi.readsRegister(ARM::CPSR))
9599 return false;
9600 if (mi.definesRegister(ARM::CPSR))
9601 break; // Should have kill-flag - update below.
9602 }
9603
9604 // If we hit the end of the block, check whether CPSR is live into a
9605 // successor.
9606 if (miI == BB->end()) {
9608 sEnd = BB->succ_end();
9609 sItr != sEnd; ++sItr) {
9610 MachineBasicBlock* succ = *sItr;
9611 if (succ->isLiveIn(ARM::CPSR))
9612 return false;
9613 }
9614 }
9615
9616 // We found a def, or hit the end of the basic block and CPSR wasn't live
9617 // out. SelectMI should have a kill flag on CPSR.
9618 SelectItr->addRegisterKilled(ARM::CPSR, TRI);
9619 return true;
9620}
9621
9624 MachineBasicBlock *BB) const {
9625 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
9626 DebugLoc dl = MI.getDebugLoc();
9627 bool isThumb2 = Subtarget->isThumb2();
9628 switch (MI.getOpcode()) {
9629 default: {
9630 MI.print(errs());
9631 llvm_unreachable("Unexpected instr type to insert");
9632 }
9633
9634 // Thumb1 post-indexed loads are really just single-register LDMs.
9635 case ARM::tLDR_postidx: {
9636 MachineOperand Def(MI.getOperand(1));
9637 BuildMI(*BB, MI, dl, TII->get(ARM::tLDMIA_UPD))
9638 .add(Def) // Rn_wb
9639 .add(MI.getOperand(2)) // Rn
9640 .add(MI.getOperand(3)) // PredImm
9641 .add(MI.getOperand(4)) // PredReg
9642 .add(MI.getOperand(0)) // Rt
9643 .cloneMemRefs(MI);
9644 MI.eraseFromParent();
9645 return BB;
9646 }
9647
9648 // The Thumb2 pre-indexed stores have the same MI operands, they just
9649 // define them differently in the .td files from the isel patterns, so
9650 // they need pseudos.
9651 case ARM::t2STR_preidx:
9652 MI.setDesc(TII->get(ARM::t2STR_PRE));
9653 return BB;
9654 case ARM::t2STRB_preidx:
9655 MI.setDesc(TII->get(ARM::t2STRB_PRE));
9656 return BB;
9657 case ARM::t2STRH_preidx:
9658 MI.setDesc(TII->get(ARM::t2STRH_PRE));
9659 return BB;
9660
9661 case ARM::STRi_preidx:
9662 case ARM::STRBi_preidx: {
9663 unsigned NewOpc = MI.getOpcode() == ARM::STRi_preidx ? ARM::STR_PRE_IMM
9664 : ARM::STRB_PRE_IMM;
9665 // Decode the offset.
9666 unsigned Offset = MI.getOperand(4).getImm();
9667 bool isSub = ARM_AM::getAM2Op(Offset) == ARM_AM::sub;
9668 Offset = ARM_AM::getAM2Offset(Offset);
9669 if (isSub)
9670 Offset = -Offset;
9671
9672 MachineMemOperand *MMO = *MI.memoperands_begin();
9673 BuildMI(*BB, MI, dl, TII->get(NewOpc))
9674 .add(MI.getOperand(0)) // Rn_wb
9675 .add(MI.getOperand(1)) // Rt
9676 .add(MI.getOperand(2)) // Rn
9677 .addImm(Offset) // offset (skip GPR==zero_reg)
9678 .add(MI.getOperand(5)) // pred
9679 .add(MI.getOperand(6))
9680 .addMemOperand(MMO);
9681 MI.eraseFromParent();
9682 return BB;
9683 }
9684 case ARM::STRr_preidx:
9685 case ARM::STRBr_preidx:
9686 case ARM::STRH_preidx: {
9687 unsigned NewOpc;
9688 switch (MI.getOpcode()) {
9689 default: llvm_unreachable("unexpected opcode!");
9690 case ARM::STRr_preidx: NewOpc = ARM::STR_PRE_REG; break;
9691 case ARM::STRBr_preidx: NewOpc = ARM::STRB_PRE_REG; break;
9692 case ARM::STRH_preidx: NewOpc = ARM::STRH_PRE; break;
9693 }
9694 MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(NewOpc));
9695 for (unsigned i = 0; i < MI.getNumOperands(); ++i)
9696 MIB.add(MI.getOperand(i));
9697 MI.eraseFromParent();
9698 return BB;
9699 }
9700
9701 case ARM::tMOVCCr_pseudo: {
9702 // To "insert" a SELECT_CC instruction, we actually have to insert the
9703 // diamond control-flow pattern. The incoming instruction knows the
9704 // destination vreg to set, the condition code register to branch on, the
9705 // true/false values to select between, and a branch opcode to use.
9706 const BasicBlock *LLVM_BB = BB->getBasicBlock();
9708
9709 // thisMBB:
9710 // ...
9711 // TrueVal = ...
9712 // cmpTY ccX, r1, r2
9713 // bCC copy1MBB
9714 // fallthrough --> copy0MBB
9715 MachineBasicBlock *thisMBB = BB;
9716 MachineFunction *F = BB->getParent();
9717 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
9718 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
9719 F->insert(It, copy0MBB);
9720 F->insert(It, sinkMBB);
9721
9722 // Check whether CPSR is live past the tMOVCCr_pseudo.
9723 const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
9724 if (!MI.killsRegister(ARM::CPSR) &&
9725 !checkAndUpdateCPSRKill(MI, thisMBB, TRI)) {
9726 copy0MBB->addLiveIn(ARM::CPSR);
9727 sinkMBB->addLiveIn(ARM::CPSR);
9728 }
9729
9730 // Transfer the remainder of BB and its successor edges to sinkMBB.
9731 sinkMBB->splice(sinkMBB->begin(), BB,
9732 std::next(MachineBasicBlock::iterator(MI)), BB->end());
9734
9735 BB->addSuccessor(copy0MBB);
9736 BB->addSuccessor(sinkMBB);
9737
9738 BuildMI(BB, dl, TII->get(ARM::tBcc))
9739 .addMBB(sinkMBB)
9740 .addImm(MI.getOperand(3).getImm())
9741 .addReg(MI.getOperand(4).getReg());
9742
9743 // copy0MBB:
9744 // %FalseValue = ...
9745 // # fallthrough to sinkMBB
9746 BB = copy0MBB;
9747
9748 // Update machine-CFG edges
9749 BB->addSuccessor(sinkMBB);
9750
9751 // sinkMBB:
9752 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
9753 // ...
9754 BB = sinkMBB;
9755 BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), MI.getOperand(0).getReg())
9756 .addReg(MI.getOperand(1).getReg())
9757 .addMBB(copy0MBB)
9758 .addReg(MI.getOperand(2).getReg())
9759 .addMBB(thisMBB);
9760
9761 MI.eraseFromParent(); // The pseudo instruction is gone now.
9762 return BB;
9763 }
9764
9765 case ARM::BCCi64:
9766 case ARM::BCCZi64: {
9767 // If there is an unconditional branch to the other successor, remove it.
9768 BB->erase(std::next(MachineBasicBlock::iterator(MI)), BB->end());
9769
9770 // Compare both parts that make up the double comparison separately for
9771 // equality.
9772 bool RHSisZero = MI.getOpcode() == ARM::BCCZi64;
9773
9774 unsigned LHS1 = MI.getOperand(1).getReg();
9775 unsigned LHS2 = MI.getOperand(2).getReg();
9776 if (RHSisZero) {
9777 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
9778 .addReg(LHS1)
9779 .addImm(0)
9781 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
9782 .addReg(LHS2).addImm(0)
9783 .addImm(ARMCC::EQ).addReg(ARM::CPSR);
9784 } else {
9785 unsigned RHS1 = MI.getOperand(3).getReg();
9786 unsigned RHS2 = MI.getOperand(4).getReg();
9787 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
9788 .addReg(LHS1)
9789 .addReg(RHS1)
9791 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
9792 .addReg(LHS2).addReg(RHS2)
9793 .addImm(ARMCC::EQ).addReg(ARM::CPSR);
9794 }
9795
9796 MachineBasicBlock *destMBB = MI.getOperand(RHSisZero ? 3 : 5).getMBB();
9797 MachineBasicBlock *exitMBB = OtherSucc(BB, destMBB);
9798 if (MI.getOperand(0).getImm() == ARMCC::NE)
9799 std::swap(destMBB, exitMBB);
9800
9801 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
9802 .addMBB(destMBB).addImm(ARMCC::EQ).addReg(ARM::CPSR);
9803 if (isThumb2)
9804 BuildMI(BB, dl, TII->get(ARM::t2B))
9805 .addMBB(exitMBB)
9807 else
9808 BuildMI(BB, dl, TII->get(ARM::B)) .addMBB(exitMBB);
9809
9810 MI.eraseFromParent(); // The pseudo instruction is gone now.
9811 return BB;
9812 }
9813
9814 case ARM::Int_eh_sjlj_setjmp:
9815 case ARM::Int_eh_sjlj_setjmp_nofp:
9816 case ARM::tInt_eh_sjlj_setjmp:
9817 case ARM::t2Int_eh_sjlj_setjmp:
9818 case ARM::t2Int_eh_sjlj_setjmp_nofp:
9819 return BB;
9820
9821 case ARM::Int_eh_sjlj_setup_dispatch:
9822 EmitSjLjDispatchBlock(MI, BB);
9823 return BB;
9824
9825 case ARM::ABS:
9826 case ARM::t2ABS: {
9827 // To insert an ABS instruction, we have to insert the
9828 // diamond control-flow pattern. The incoming instruction knows the
9829 // source vreg to test against 0, the destination vreg to set,
9830 // the condition code register to branch on, the
9831 // true/false values to select between, and a branch opcode to use.
9832 // It transforms
9833 // V1 = ABS V0
9834 // into
9835 // V2 = MOVS V0
9836 // BCC (branch to SinkBB if V0 >= 0)
9837 // RSBBB: V3 = RSBri V2, 0 (compute ABS if V2 < 0)
9838 // SinkBB: V1 = PHI(V2, V3)
9839 const BasicBlock *LLVM_BB = BB->getBasicBlock();
9841 MachineFunction *Fn = BB->getParent();
9842 MachineBasicBlock *RSBBB = Fn->CreateMachineBasicBlock(LLVM_BB);
9843 MachineBasicBlock *SinkBB = Fn->CreateMachineBasicBlock(LLVM_BB);
9844 Fn->insert(BBI, RSBBB);
9845 Fn->insert(BBI, SinkBB);
9846
9847 unsigned int ABSSrcReg = MI.getOperand(1).getReg();
9848 unsigned int ABSDstReg = MI.getOperand(0).getReg();
9849 bool ABSSrcKIll = MI.getOperand(1).isKill();
9850 bool isThumb2 = Subtarget->isThumb2();
9852 // In Thumb mode S must not be specified if source register is the SP or
9853 // PC and if destination register is the SP, so restrict register class
9854 unsigned NewRsbDstReg =
9855 MRI.createVirtualRegister(isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRRegClass);
9856
9857 // Transfer the remainder of BB and its successor edges to sinkMBB.
9858 SinkBB->splice(SinkBB->begin(), BB,
9859 std::next(MachineBasicBlock::iterator(MI)), BB->end());
9861
9862 BB->addSuccessor(RSBBB);
9863 BB->addSuccessor(SinkBB);
9864
9865 // fall through to SinkMBB
9866 RSBBB->addSuccessor(SinkBB);
9867
9868 // insert a cmp at the end of BB
9869 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
9870 .addReg(ABSSrcReg)
9871 .addImm(0)
9873
9874 // insert a bcc with opposite CC to ARMCC::MI at the end of BB
9875 BuildMI(BB, dl,
9876 TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)).addMBB(SinkBB)
9878
9879 // insert rsbri in RSBBB
9880 // Note: BCC and rsbri will be converted into predicated rsbmi
9881 // by if-conversion pass
9882 BuildMI(*RSBBB, RSBBB->begin(), dl,
9883 TII->get(isThumb2 ? ARM::t2RSBri : ARM::RSBri), NewRsbDstReg)
9884 .addReg(ABSSrcReg, ABSSrcKIll ? RegState::Kill : 0)
9885 .addImm(0)
9887 .add(condCodeOp());
9888
9889 // insert PHI in SinkBB,
9890 // reuse ABSDstReg to not change uses of ABS instruction
9891 BuildMI(*SinkBB, SinkBB->begin(), dl,
9892 TII->get(ARM::PHI), ABSDstReg)
9893 .addReg(NewRsbDstReg).addMBB(RSBBB)
9894 .addReg(ABSSrcReg).addMBB(BB);
9895
9896 // remove ABS instruction
9897 MI.eraseFromParent();
9898
9899 // return last added BB
9900 return SinkBB;
9901 }
9902 case ARM::COPY_STRUCT_BYVAL_I32:
9903 ++NumLoopByVals;
9904 return EmitStructByval(MI, BB);
9905 case ARM::WIN__CHKSTK:
9906 return EmitLowered__chkstk(MI, BB);
9907 case ARM::WIN__DBZCHK:
9908 return EmitLowered__dbzchk(MI, BB);
9909 }
9910}
9911
9912/// Attaches vregs to MEMCPY that it will use as scratch registers
9913/// when it is expanded into LDM/STM. This is done as a post-isel lowering
9914/// instead of as a custom inserter because we need the use list from the SDNode.
9915static void attachMEMCPYScratchRegs(const ARMSubtarget *Subtarget,
9916 MachineInstr &MI, const SDNode *Node) {
9917 bool isThumb1 = Subtarget->isThumb1Only();
9918
9919 DebugLoc DL = MI.getDebugLoc();
9920 MachineFunction *MF = MI.getParent()->getParent();
9922 MachineInstrBuilder MIB(*MF, MI);
9923
9924 // If the new dst/src is unused mark it as dead.
9925 if (!Node->hasAnyUseOfValue(0)) {
9926 MI.getOperand(0).setIsDead(true);
9927 }
9928 if (!Node->hasAnyUseOfValue(1)) {
9929 MI.getOperand(1).setIsDead(true);
9930 }
9931
9932 // The MEMCPY both defines and kills the scratch registers.
9933 for (unsigned I = 0; I != MI.getOperand(4).getImm(); ++I) {
9934 unsigned TmpReg = MRI.createVirtualRegister(isThumb1 ? &ARM::tGPRRegClass
9935 : &ARM::GPRRegClass);
9937 }
9938}
9939
9941 SDNode *Node) const {
9942 if (MI.getOpcode() == ARM::MEMCPY) {
9943 attachMEMCPYScratchRegs(Subtarget, MI, Node);
9944 return;
9945 }
9946
9947 const MCInstrDesc *MCID = &MI.getDesc();
9948 // Adjust potentially 's' setting instructions after isel, i.e. ADC, SBC, RSB,
9949 // RSC. Coming out of isel, they have an implicit CPSR def, but the optional
9950 // operand is still set to noreg. If needed, set the optional operand's
9951 // register to CPSR, and remove the redundant implicit def.
9952 //
9953 // e.g. ADCS (..., implicit-def CPSR) -> ADC (... opt:def CPSR).
9954
9955 // Rename pseudo opcodes.
9956 unsigned NewOpc = convertAddSubFlagsOpcode(MI.getOpcode());
9957 unsigned ccOutIdx;
9958 if (NewOpc) {
9959 const ARMBaseInstrInfo *TII = Subtarget->getInstrInfo();
9960 MCID = &TII->get(NewOpc);
9961
9962 assert(MCID->getNumOperands() ==
9963 MI.getDesc().getNumOperands() + 5 - MI.getDesc().getSize()
9964 && "converted opcode should be the same except for cc_out"
9965 " (and, on Thumb1, pred)");
9966
9967 MI.setDesc(*MCID);
9968
9969 // Add the optional cc_out operand
9970 MI.addOperand(MachineOperand::CreateReg(0, /*isDef=*/true));
9971
9972 // On Thumb1, move all input operands to the end, then add the predicate
9973 if (Subtarget->isThumb1Only()) {
9974 for (unsigned c = MCID->getNumOperands() - 4; c--;) {
9975 MI.addOperand(MI.getOperand(1));
9976 MI.RemoveOperand(1);
9977 }
9978
9979 // Restore the ties
9980 for (unsigned i = MI.getNumOperands(); i--;) {
9981 const MachineOperand& op = MI.getOperand(i);
9982 if (op.isReg() && op.isUse()) {
9983 int DefIdx = MCID->getOperandConstraint(i, MCOI::TIED_TO);
9984 if (DefIdx != -1)
9985 MI.tieOperands(DefIdx, i);
9986 }
9987 }
9988
9990 MI.addOperand(MachineOperand::CreateReg(0, /*isDef=*/false));
9991 ccOutIdx = 1;
9992 } else
9993 ccOutIdx = MCID->getNumOperands() - 1;
9994 } else
9995 ccOutIdx = MCID->getNumOperands() - 1;
9996
9997 // Any ARM instruction that sets the 's' bit should specify an optional
9998 // "cc_out" operand in the last operand position.
9999 if (!MI.hasOptionalDef() || !MCID->OpInfo[ccOutIdx].isOptionalDef()) {
10000 assert(!NewOpc && "Optional cc_out operand required");
10001 return;
10002 }
10003 // Look for an implicit def of CPSR added by MachineInstr ctor. Remove it
10004 // since we already have an optional CPSR def.
10005 bool definesCPSR = false;
10006 bool deadCPSR = false;
10007 for (unsigned i = MCID->getNumOperands(), e = MI.getNumOperands(); i != e;
10008 ++i) {
10009 const MachineOperand &MO = MI.getOperand(i);
10010 if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR) {
10011 definesCPSR = true;
10012 if (MO.isDead())
10013 deadCPSR = true;
10014 MI.RemoveOperand(i);
10015 break;
10016 }
10017 }
10018 if (!definesCPSR) {
10019 assert(!NewOpc && "Optional cc_out operand required");
10020 return;
10021 }
10022 assert(deadCPSR == !Node->hasAnyUseOfValue(1) && "inconsistent dead flag");
10023 if (deadCPSR) {
10024 assert(!MI.getOperand(ccOutIdx).getReg() &&
10025 "expect uninitialized optional cc_out operand");
10026 // Thumb1 instructions must have the S bit even if the CPSR is dead.
10027 if (!Subtarget->isThumb1Only())
10028 return;
10029 }
10030
10031 // If this instruction was defined with an optional CPSR def and its dag node
10032 // had a live implicit CPSR def, then activate the optional CPSR def.
10033 MachineOperand &MO = MI.getOperand(ccOutIdx);
10034 MO.setReg(ARM::CPSR);
10035 MO.setIsDef(true);
10036}
10037
10038//===----------------------------------------------------------------------===//
10039// ARM Optimization Hooks
10040//===----------------------------------------------------------------------===//
10041
10042// Helper function that checks if N is a null or all ones constant.
10043static inline bool isZeroOrAllOnes(SDValue N, bool AllOnes) {
10044 return AllOnes ? isAllOnesConstant(N) : isNullConstant(N);
10045}
10046
10047// Return true if N is conditionally 0 or all ones.
10048// Detects these expressions where cc is an i1 value:
10049//
10050// (select cc 0, y) [AllOnes=0]
10051// (select cc y, 0) [AllOnes=0]
10052// (zext cc) [AllOnes=0]
10053// (sext cc) [AllOnes=0/1]
10054// (select cc -1, y) [AllOnes=1]
10055// (select cc y, -1) [AllOnes=1]
10056//
10057// Invert is set when N is the null/all ones constant when CC is false.
10058// OtherOp is set to the alternative value of N.
10059static bool isConditionalZeroOrAllOnes(SDNode *N, bool AllOnes,
10060 SDValue &CC, bool &Invert,
10061 SDValue &OtherOp,
10062 SelectionDAG &DAG) {
10063 switch (N->getOpcode()) {
10064 default: return false;
10065 case ISD::SELECT: {
10066 CC = N->getOperand(0);
10067 SDValue N1 = N->getOperand(1);
10068 SDValue N2 = N->getOperand(2);
10069 if (isZeroOrAllOnes(N1, AllOnes)) {
10070 Invert = false;
10071 OtherOp = N2;
10072 return true;
10073 }
10074 if (isZeroOrAllOnes(N2, AllOnes)) {
10075 Invert = true;
10076 OtherOp = N1;
10077 return true;
10078 }
10079 return false;
10080 }
10081 case ISD::ZERO_EXTEND:
10082 // (zext cc) can never be the all ones value.
10083 if (AllOnes)
10084 return false;
10086 case ISD::SIGN_EXTEND: {
10087 SDLoc dl(N);
10088 EVT VT = N->getValueType(0);
10089 CC = N->getOperand(0);
10090 if (CC.getValueType() != MVT::i1 || CC.getOpcode() != ISD::SETCC)
10091 return false;
10092 Invert = !AllOnes;
10093 if (AllOnes)
10094 // When looking for an AllOnes constant, N is an sext, and the 'other'
10095 // value is 0.
10096 OtherOp = DAG.getConstant(0, dl, VT);
10097 else if (N->getOpcode() == ISD::ZERO_EXTEND)
10098 // When looking for a 0 constant, N can be zext or sext.
10099 OtherOp = DAG.getConstant(1, dl, VT);
10100 else
10101 OtherOp = DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), dl,
10102 VT);
10103 return true;
10104 }
10105 }
10106}
10107
10108// Combine a constant select operand into its use:
10109//
10110// (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
10111// (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
10112// (and (select cc, -1, c), x) -> (select cc, x, (and, x, c)) [AllOnes=1]
10113// (or (select cc, 0, c), x) -> (select cc, x, (or, x, c))
10114// (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c))
10115//
10116// The transform is rejected if the select doesn't have a constant operand that
10117// is null, or all ones when AllOnes is set.
10118//
10119// Also recognize sext/zext from i1:
10120//
10121// (add (zext cc), x) -> (select cc (add x, 1), x)
10122// (add (sext cc), x) -> (select cc (add x, -1), x)
10123//
10124// These transformations eventually create predicated instructions.
10125//
10126// @param N The node to transform.
10127// @param Slct The N operand that is a select.
10128// @param OtherOp The other N operand (x above).
10129// @param DCI Context.
10130// @param AllOnes Require the select constant to be all ones instead of null.
10131// @returns The new node, or SDValue() on failure.
10132static
10135 bool AllOnes = false) {
10136 SelectionDAG &DAG = DCI.DAG;
10137 EVT VT = N->getValueType(0);
10138 SDValue NonConstantVal;
10139 SDValue CCOp;
10140 bool SwapSelectOps;
10141 if (!isConditionalZeroOrAllOnes(Slct.getNode(), AllOnes, CCOp, SwapSelectOps,
10142 NonConstantVal, DAG))
10143 return SDValue();
10144
10145 // Slct is now know to be the desired identity constant when CC is true.
10146 SDValue TrueVal = OtherOp;
10147 SDValue FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
10148 OtherOp, NonConstantVal);
10149 // Unless SwapSelectOps says CC should be false.
10150 if (SwapSelectOps)
10151 std::swap(TrueVal, FalseVal);
10152
10153 return DAG.getNode(ISD::SELECT, SDLoc(N), VT,
10154 CCOp, TrueVal, FalseVal);
10155}
10156
10157// Attempt combineSelectAndUse on each operand of a commutative operator N.
10158static
10161 SDValue N0 = N->getOperand(0);
10162 SDValue N1 = N->getOperand(1);
10163 if (N0.getNode()->hasOneUse())
10164 if (SDValue Result = combineSelectAndUse(N, N0, N1, DCI, AllOnes))
10165 return Result;
10166 if (N1.getNode()->hasOneUse())
10167 if (SDValue Result = combineSelectAndUse(N, N1, N0, DCI, AllOnes))
10168 return Result;
10169 return SDValue();
10170}
10171
10173 // VUZP shuffle node.
10174 if (N->getOpcode() == ARMISD::VUZP)
10175 return true;
10176
10177 // "VUZP" on i32 is an alias for VTRN.
10178 if (N->getOpcode() == ARMISD::VTRN && N->getValueType(0) == MVT::v2i32)
10179 return true;
10180
10181 return false;
10182}
10183
10186 const ARMSubtarget *Subtarget) {
10187 // Look for ADD(VUZP.0, VUZP.1).
10188 if (!IsVUZPShuffleNode(N0.getNode()) || N0.getNode() != N1.getNode() ||
10189 N0 == N1)
10190 return SDValue();
10191
10192 // Make sure the ADD is a 64-bit add; there is no 128-bit VPADD.
10193 if (!N->getValueType(0).is64BitVector())
10194 return SDValue();
10195
10196 // Generate vpadd.
10197 SelectionDAG &DAG = DCI.DAG;
10198 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
10199 SDLoc dl(N);
10200 SDNode *Unzip = N0.getNode();
10201 EVT VT = N->getValueType(0);
10202
10204 Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpadd, dl,
10205 TLI.getPointerTy(DAG.getDataLayout())));
10206 Ops.push_back(Unzip->getOperand(0));
10207 Ops.push_back(Unzip->getOperand(1));
10208
10209 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, Ops);
10210}
10211
10214 const ARMSubtarget *Subtarget) {
10215 // Check for two extended operands.
10216 if (!(N0.getOpcode() == ISD::SIGN_EXTEND &&
10217 N1.getOpcode() == ISD::SIGN_EXTEND) &&
10218 !(N0.getOpcode() == ISD::ZERO_EXTEND &&
10219 N1.getOpcode() == ISD::ZERO_EXTEND))
10220 return SDValue();
10221
10222 SDValue N00 = N0.getOperand(0);
10223 SDValue N10 = N1.getOperand(0);
10224
10225 // Look for ADD(SEXT(VUZP.0), SEXT(VUZP.1))
10226 if (!IsVUZPShuffleNode(N00.getNode()) || N00.getNode() != N10.getNode() ||
10227 N00 == N10)
10228 return SDValue();
10229
10230 // We only recognize Q register paddl here; this can't be reached until
10231 // after type legalization.
10232 if (!N00.getValueType().is64BitVector() ||
10234 return SDValue();
10235
10236 // Generate vpaddl.
10237 SelectionDAG &DAG = DCI.DAG;
10238 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
10239 SDLoc dl(N);
10240 EVT VT = N->getValueType(0);
10241
10243 // Form vpaddl.sN or vpaddl.uN depending on the kind of extension.
10244 unsigned Opcode;
10245 if (N0.getOpcode() == ISD::SIGN_EXTEND)
10246 Opcode = Intrinsic::arm_neon_vpaddls;
10247 else
10248 Opcode = Intrinsic::arm_neon_vpaddlu;
10249 Ops.push_back(DAG.getConstant(Opcode, dl,
10250 TLI.getPointerTy(DAG.getDataLayout())));
10251 EVT ElemTy = N00.getValueType().getVectorElementType();
10252 unsigned NumElts = VT.getVectorNumElements();
10253 EVT ConcatVT = EVT::getVectorVT(*DAG.getContext(), ElemTy, NumElts * 2);
10254 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), ConcatVT,
10255 N00.getOperand(0), N00.getOperand(1));
10256 Ops.push_back(Concat);
10257
10258 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, Ops);
10259}
10260
10261// FIXME: This function shouldn't be necessary; if we lower BUILD_VECTOR in
10262// an appropriate manner, we end up with ADD(VUZP(ZEXT(N))), which is
10263// much easier to match.
10264static SDValue
10267 const ARMSubtarget *Subtarget) {
10268 // Only perform optimization if after legalize, and if NEON is available. We
10269 // also expected both operands to be BUILD_VECTORs.
10270 if (DCI.isBeforeLegalize() || !Subtarget->hasNEON()
10271 || N0.getOpcode() != ISD::BUILD_VECTOR
10272 || N1.getOpcode() != ISD::BUILD_VECTOR)
10273 return SDValue();
10274
10275 // Check output type since VPADDL operand elements can only be 8, 16, or 32.
10276 EVT VT = N->getValueType(0);
10277 if (!VT.isInteger() || VT.getVectorElementType() == MVT::i64)
10278 return SDValue();
10279
10280 // Check that the vector operands are of the right form.
10281 // N0 and N1 are BUILD_VECTOR nodes with N number of EXTRACT_VECTOR
10282 // operands, where N is the size of the formed vector.
10283 // Each EXTRACT_VECTOR should have the same input vector and odd or even
10284 // index such that we have a pair wise add pattern.
10285
10286 // Grab the vector that all EXTRACT_VECTOR nodes should be referencing.
10288 return SDValue();
10289 SDValue Vec = N0->getOperand(0)->getOperand(0);
10290 SDNode *V = Vec.getNode();
10291 unsigned nextIndex = 0;
10292
10293 // For each operands to the ADD which are BUILD_VECTORs,
10294 // check to see if each of their operands are an EXTRACT_VECTOR with
10295 // the same vector and appropriate index.
10296 for (unsigned i = 0, e = N0->getNumOperands(); i != e; ++i) {
10299
10300 SDValue ExtVec0 = N0->getOperand(i);
10301 SDValue ExtVec1 = N1->getOperand(i);
10302
10303 // First operand is the vector, verify its the same.
10304 if (V != ExtVec0->getOperand(0).getNode() ||
10305 V != ExtVec1->getOperand(0).getNode())
10306 return SDValue();
10307
10308 // Second is the constant, verify its correct.
10309 ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(ExtVec0->getOperand(1));
10310 ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(ExtVec1->getOperand(1));
10311
10312 // For the constant, we want to see all the even or all the odd.
10313 if (!C0 || !C1 || C0->getZExtValue() != nextIndex
10314 || C1->getZExtValue() != nextIndex+1)
10315 return SDValue();
10316
10317 // Increment index.
10318 nextIndex+=2;
10319 } else
10320 return SDValue();
10321 }
10322
10323 // Don't generate vpaddl+vmovn; we'll match it to vpadd later. Also make sure
10324 // we're using the entire input vector, otherwise there's a size/legality
10325 // mismatch somewhere.
10326 if (nextIndex != Vec.getValueType().getVectorNumElements() ||
10328 return SDValue();
10329
10330 // Create VPADDL node.
10331 SelectionDAG &DAG = DCI.DAG;
10332 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
10333
10334 SDLoc dl(N);
10335
10336 // Build operand list.
10338 Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpaddls, dl,
10339 TLI.getPointerTy(DAG.getDataLayout())));
10340
10341 // Input is the vector.
10342 Ops.push_back(Vec);
10343
10344 // Get widened type and narrowed type.
10345 MVT widenType;
10346 unsigned numElem = VT.getVectorNumElements();
10347
10348 EVT inputLaneType = Vec.getValueType().getVectorElementType();
10349 switch (inputLaneType.getSimpleVT().SimpleTy) {
10350 case MVT::i8: widenType = MVT::getVectorVT(MVT::i16, numElem); break;
10351 case MVT::i16: widenType = MVT::getVectorVT(MVT::i32, numElem); break;
10352 case MVT::i32: widenType = MVT::getVectorVT(MVT::i64, numElem); break;
10353 default:
10354 llvm_unreachable("Invalid vector element type for padd optimization.");
10355 }
10356
10357 SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, widenType, Ops);
10358 unsigned ExtOp = VT.bitsGT(tmp.getValueType()) ? ISD::ANY_EXTEND : ISD::TRUNCATE;
10359 return DAG.getNode(ExtOp, dl, VT, tmp);
10360}
10361
10363 if (V->getOpcode() == ISD::UMUL_LOHI ||
10364 V->getOpcode() == ISD::SMUL_LOHI)
10365 return V;
10366 return SDValue();
10367}
10368
10369static SDValue AddCombineTo64BitSMLAL16(SDNode *AddcNode, SDNode *AddeNode,
10371 const ARMSubtarget *Subtarget) {
10372 if (Subtarget->isThumb()) {
10373 if (!Subtarget->hasDSP())
10374 return SDValue();
10375 } else if (!Subtarget->hasV5TEOps())
10376 return SDValue();
10377
10378 // SMLALBB, SMLALBT, SMLALTB, SMLALTT multiply two 16-bit values and
10379 // accumulates the product into a 64-bit value. The 16-bit values will
10380 // be sign extended somehow or SRA'd into 32-bit values
10381 // (addc (adde (mul 16bit, 16bit), lo), hi)
10382 SDValue Mul = AddcNode->getOperand(0);
10383 SDValue Lo = AddcNode->getOperand(1);
10384 if (Mul.getOpcode() != ISD::MUL) {
10385 Lo = AddcNode->getOperand(0);
10386 Mul = AddcNode->getOperand(1);
10387 if (Mul.getOpcode() != ISD::MUL)
10388 return SDValue();
10389 }
10390
10391 SDValue SRA = AddeNode->getOperand(0);
10392 SDValue Hi = AddeNode->getOperand(1);
10393 if (SRA.getOpcode() != ISD::SRA) {
10394 SRA = AddeNode->getOperand(1);
10395 Hi = AddeNode->getOperand(0);
10396 if (SRA.getOpcode() != ISD::SRA)
10397 return SDValue();
10398 }
10399 if (auto Const = dyn_cast<ConstantSDNode>(SRA.getOperand(1))) {
10400 if (Const->getZExtValue() != 31)
10401 return SDValue();
10402 } else
10403 return SDValue();
10404
10405 if (SRA.getOperand(0) != Mul)
10406 return SDValue();
10407
10408 SelectionDAG &DAG = DCI.DAG;
10409 SDLoc dl(AddcNode);
10410 unsigned Opcode = 0;
10411 SDValue Op0;
10412 SDValue Op1;
10413
10414 if (isS16(Mul.getOperand(0), DAG) && isS16(Mul.getOperand(1), DAG)) {
10415 Opcode = ARMISD::SMLALBB;
10416 Op0 = Mul.getOperand(0);
10417 Op1 = Mul.getOperand(1);
10418 } else if (isS16(Mul.getOperand(0), DAG) && isSRA16(Mul.getOperand(1))) {
10419 Opcode = ARMISD::SMLALBT;
10420 Op0 = Mul.getOperand(0);
10421 Op1 = Mul.getOperand(1).getOperand(0);
10422 } else if (isSRA16(Mul.getOperand(0)) && isS16(Mul.getOperand(1), DAG)) {
10423 Opcode = ARMISD::SMLALTB;
10424 Op0 = Mul.getOperand(0).getOperand(0);
10425 Op1 = Mul.getOperand(1);
10426 } else if (isSRA16(Mul.getOperand(0)) && isSRA16(Mul.getOperand(1))) {
10427 Opcode = ARMISD::SMLALTT;
10428 Op0 = Mul->getOperand(0).getOperand(0);
10429 Op1 = Mul->getOperand(1).getOperand(0);
10430 }
10431
10432 if (!Op0 || !Op1)
10433 return SDValue();
10434
10435 SDValue SMLAL = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32),
10436 Op0, Op1, Lo, Hi);
10437 // Replace the ADDs' nodes uses by the MLA node's values.
10438 SDValue HiMLALResult(SMLAL.getNode(), 1);
10439 SDValue LoMLALResult(SMLAL.getNode(), 0);
10440
10441 DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), LoMLALResult);
10442 DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), HiMLALResult);
10443
10444 // Return original node to notify the driver to stop replacing.
10445 SDValue resNode(AddcNode, 0);
10446 return resNode;
10447}
10448
10451 const ARMSubtarget *Subtarget) {
10452 // Look for multiply add opportunities.
10453 // The pattern is a ISD::UMUL_LOHI followed by two add nodes, where
10454 // each add nodes consumes a value from ISD::UMUL_LOHI and there is
10455 // a glue link from the first add to the second add.
10456 // If we find this pattern, we can replace the U/SMUL_LOHI, ADDC, and ADDE by
10457 // a S/UMLAL instruction.
10458 // UMUL_LOHI
10459 // / :lo \ :hi
10460 // V \ [no multiline comment]
10461 // loAdd -> ADDC |
10462 // \ :carry /
10463 // V V
10464 // ADDE <- hiAdd
10465 //
10466 // In the special case where only the higher part of a signed result is used
10467 // and the add to the low part of the result of ISD::UMUL_LOHI adds or subtracts
10468 // a constant with the exact value of 0x80000000, we recognize we are dealing
10469 // with a "rounded multiply and add" (or subtract) and transform it into
10470 // either a ARMISD::SMMLAR or ARMISD::SMMLSR respectively.
10471
10472 assert((AddeSubeNode->getOpcode() == ARMISD::ADDE ||
10473 AddeSubeNode->getOpcode() == ARMISD::SUBE) &&
10474 "Expect an ADDE or SUBE");
10475
10476 assert(AddeSubeNode->getNumOperands() == 3 &&
10477 AddeSubeNode->getOperand(2).getValueType() == MVT::i32 &&
10478 "ADDE node has the wrong inputs");
10479
10480 // Check that we are chained to the right ADDC or SUBC node.
10481 SDNode *AddcSubcNode = AddeSubeNode->getOperand(2).getNode();
10482 if ((AddeSubeNode->getOpcode() == ARMISD::ADDE &&
10483 AddcSubcNode->getOpcode() != ARMISD::ADDC) ||
10484 (AddeSubeNode->getOpcode() == ARMISD::SUBE &&
10485 AddcSubcNode->getOpcode() != ARMISD::SUBC))
10486 return SDValue();
10487
10488 SDValue AddcSubcOp0 = AddcSubcNode->getOperand(0);
10489 SDValue AddcSubcOp1 = AddcSubcNode->getOperand(1);
10490
10491 // Check if the two operands are from the same mul_lohi node.
10492 if (AddcSubcOp0.getNode() == AddcSubcOp1.getNode())
10493 return SDValue();
10494
10495 assert(AddcSubcNode->getNumValues() == 2 &&
10496 AddcSubcNode->getValueType(0) == MVT::i32 &&
10497 "Expect ADDC with two result values. First: i32");
10498
10499 // Check that the ADDC adds the low result of the S/UMUL_LOHI. If not, it
10500 // maybe a SMLAL which multiplies two 16-bit values.
10501 if (AddeSubeNode->getOpcode() == ARMISD::ADDE &&
10502 AddcSubcOp0->getOpcode() != ISD::UMUL_LOHI &&
10503 AddcSubcOp0->getOpcode() != ISD::SMUL_LOHI &&
10504 AddcSubcOp1->getOpcode() != ISD::UMUL_LOHI &&
10505 AddcSubcOp1->getOpcode() != ISD::SMUL_LOHI)
10506 return AddCombineTo64BitSMLAL16(AddcSubcNode, AddeSubeNode, DCI, Subtarget);
10507
10508 // Check for the triangle shape.
10509 SDValue AddeSubeOp0 = AddeSubeNode->getOperand(0);
10510 SDValue AddeSubeOp1 = AddeSubeNode->getOperand(1);
10511
10512 // Make sure that the ADDE/SUBE operands are not coming from the same node.
10513 if (AddeSubeOp0.getNode() == AddeSubeOp1.getNode())
10514 return SDValue();
10515
10516 // Find the MUL_LOHI node walking up ADDE/SUBE's operands.
10517 bool IsLeftOperandMUL = false;
10518 SDValue MULOp = findMUL_LOHI(AddeSubeOp0);
10519 if (MULOp == SDValue())
10520 MULOp = findMUL_LOHI(AddeSubeOp1);
10521 else
10522 IsLeftOperandMUL = true;
10523 if (MULOp == SDValue())
10524 return SDValue();
10525
10526 // Figure out the right opcode.
10527 unsigned Opc = MULOp->getOpcode();
10528 unsigned FinalOpc = (Opc == ISD::SMUL_LOHI) ? ARMISD::SMLAL : ARMISD::UMLAL;
10529
10530 // Figure out the high and low input values to the MLAL node.
10531 SDValue *HiAddSub = nullptr;
10532 SDValue *LoMul = nullptr;
10533 SDValue *LowAddSub = nullptr;
10534
10535 // Ensure that ADDE/SUBE is from high result of ISD::xMUL_LOHI.
10536 if ((AddeSubeOp0 != MULOp.getValue(1)) && (AddeSubeOp1 != MULOp.getValue(1)))
10537 return SDValue();
10538
10539 if (IsLeftOperandMUL)
10540 HiAddSub = &AddeSubeOp1;
10541 else
10542 HiAddSub = &AddeSubeOp0;
10543
10544 // Ensure that LoMul and LowAddSub are taken from correct ISD::SMUL_LOHI node
10545 // whose low result is fed to the ADDC/SUBC we are checking.
10546
10547 if (AddcSubcOp0 == MULOp.getValue(0)) {
10548 LoMul = &AddcSubcOp0;
10549 LowAddSub = &AddcSubcOp1;
10550 }
10551 if (AddcSubcOp1 == MULOp.getValue(0)) {
10552 LoMul = &AddcSubcOp1;
10553 LowAddSub = &AddcSubcOp0;
10554 }
10555
10556 if (!LoMul)
10557 return SDValue();
10558
10559 // If HiAddSub is the same node as ADDC/SUBC or is a predecessor of ADDC/SUBC
10560 // the replacement below will create a cycle.
10561 if (AddcSubcNode == HiAddSub->getNode() ||
10562 AddcSubcNode->isPredecessorOf(HiAddSub->getNode()))
10563 return SDValue();
10564
10565 // Create the merged node.
10566 SelectionDAG &DAG = DCI.DAG;
10567
10568 // Start building operand list.
10570 Ops.push_back(LoMul->getOperand(0));
10571 Ops.push_back(LoMul->getOperand(1));
10572
10573 // Check whether we can use SMMLAR, SMMLSR or SMMULR instead. For this to be
10574 // the case, we must be doing signed multiplication and only use the higher
10575 // part of the result of the MLAL, furthermore the LowAddSub must be a constant
10576 // addition or subtraction with the value of 0x800000.
10577 if (Subtarget->hasV6Ops() && Subtarget->hasDSP() && Subtarget->useMulOps() &&
10578 FinalOpc == ARMISD::SMLAL && !AddeSubeNode->hasAnyUseOfValue(1) &&
10579 LowAddSub->getNode()->getOpcode() == ISD::Constant &&
10580 static_cast<ConstantSDNode *>(LowAddSub->getNode())->getZExtValue() ==
10581 0x80000000) {
10582 Ops.push_back(*HiAddSub);
10583 if (AddcSubcNode->getOpcode() == ARMISD::SUBC) {
10584 FinalOpc = ARMISD::SMMLSR;
10585 } else {
10586 FinalOpc = ARMISD::SMMLAR;
10587 }
10588 SDValue NewNode = DAG.getNode(FinalOpc, SDLoc(AddcSubcNode), MVT::i32, Ops);
10589 DAG.ReplaceAllUsesOfValueWith(SDValue(AddeSubeNode, 0), NewNode);
10590
10591 return SDValue(AddeSubeNode, 0);
10592 } else if (AddcSubcNode->getOpcode() == ARMISD::SUBC)
10593 // SMMLS is generated during instruction selection and the rest of this
10594 // function can not handle the case where AddcSubcNode is a SUBC.
10595 return SDValue();
10596
10597 // Finish building the operand list for {U/S}MLAL
10598 Ops.push_back(*LowAddSub);
10599 Ops.push_back(*HiAddSub);
10600
10601 SDValue MLALNode = DAG.getNode(FinalOpc, SDLoc(AddcSubcNode),
10602 DAG.getVTList(MVT::i32, MVT::i32), Ops);
10603
10604 // Replace the ADDs' nodes uses by the MLA node's values.
10605 SDValue HiMLALResult(MLALNode.getNode(), 1);
10606 DAG.ReplaceAllUsesOfValueWith(SDValue(AddeSubeNode, 0), HiMLALResult);
10607
10608 SDValue LoMLALResult(MLALNode.getNode(), 0);
10609 DAG.ReplaceAllUsesOfValueWith(SDValue(AddcSubcNode, 0), LoMLALResult);
10610
10611 // Return original node to notify the driver to stop replacing.
10612 return SDValue(AddeSubeNode, 0);
10613}
10614
10617 const ARMSubtarget *Subtarget) {
10618 // UMAAL is similar to UMLAL except that it adds two unsigned values.
10619 // While trying to combine for the other MLAL nodes, first search for the
10620 // chance to use UMAAL. Check if Addc uses a node which has already
10621 // been combined into a UMLAL. The other pattern is UMLAL using Addc/Adde
10622 // as the addend, and it's handled in PerformUMLALCombine.
10623
10624 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
10625 return AddCombineTo64bitMLAL(AddeNode, DCI, Subtarget);
10626
10627 // Check that we have a glued ADDC node.
10628 SDNode* AddcNode = AddeNode->getOperand(2).getNode();
10629 if (AddcNode->getOpcode() != ARMISD::ADDC)
10630 return SDValue();
10631
10632 // Find the converted UMAAL or quit if it doesn't exist.
10633 SDNode *UmlalNode = nullptr;
10634 SDValue AddHi;
10635 if (AddcNode->getOperand(0).getOpcode() == ARMISD::UMLAL) {
10636 UmlalNode = AddcNode->getOperand(0).getNode();
10637 AddHi = AddcNode->getOperand(1);
10638 } else if (AddcNode->getOperand(1).getOpcode() == ARMISD::UMLAL) {
10639 UmlalNode = AddcNode->getOperand(1).getNode();
10640 AddHi = AddcNode->getOperand(0);
10641 } else {
10642 return AddCombineTo64bitMLAL(AddeNode, DCI, Subtarget);
10643 }
10644
10645 // The ADDC should be glued to an ADDE node, which uses the same UMLAL as
10646 // the ADDC as well as Zero.
10647 if (!isNullConstant(UmlalNode->getOperand(3)))
10648 return SDValue();
10649
10650 if ((isNullConstant(AddeNode->getOperand(0)) &&
10651 AddeNode->getOperand(1).getNode() == UmlalNode) ||
10652 (AddeNode->getOperand(0).getNode() == UmlalNode &&
10653 isNullConstant(AddeNode->getOperand(1)))) {
10654 SelectionDAG &DAG = DCI.DAG;
10655 SDValue Ops[] = { UmlalNode->getOperand(0), UmlalNode->getOperand(1),
10656 UmlalNode->getOperand(2), AddHi };
10657 SDValue UMAAL = DAG.getNode(ARMISD::UMAAL, SDLoc(AddcNode),
10658 DAG.getVTList(MVT::i32, MVT::i32), Ops);
10659
10660 // Replace the ADDs' nodes uses by the UMAAL node's values.
10661 DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), SDValue(UMAAL.getNode(), 1));
10662 DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), SDValue(UMAAL.getNode(), 0));
10663
10664 // Return original node to notify the driver to stop replacing.
10665 return SDValue(AddeNode, 0);
10666 }
10667 return SDValue();
10668}
10669
10671 const ARMSubtarget *Subtarget) {
10672 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
10673 return SDValue();
10674
10675 // Check that we have a pair of ADDC and ADDE as operands.
10676 // Both addends of the ADDE must be zero.
10677 SDNode* AddcNode = N->getOperand(2).getNode();
10678 SDNode* AddeNode = N->getOperand(3).getNode();
10679 if ((AddcNode->getOpcode() == ARMISD::ADDC) &&
10680 (AddeNode->getOpcode() == ARMISD::ADDE) &&
10681 isNullConstant(AddeNode->getOperand(0)) &&
10682 isNullConstant(AddeNode->getOperand(1)) &&
10683 (AddeNode->getOperand(2).getNode() == AddcNode))
10684 return DAG.getNode(ARMISD::UMAAL, SDLoc(N),
10686 {N->getOperand(0), N->getOperand(1),
10687 AddcNode->getOperand(0), AddcNode->getOperand(1)});
10688 else
10689 return SDValue();
10690}
10691
10694 const ARMSubtarget *Subtarget) {
10695 SelectionDAG &DAG(DCI.DAG);
10696
10697 if (N->getOpcode() == ARMISD::SUBC) {
10698 // (SUBC (ADDE 0, 0, C), 1) -> C
10699 SDValue LHS = N->getOperand(0);
10700 SDValue RHS = N->getOperand(1);
10701 if (LHS->getOpcode() == ARMISD::ADDE &&
10702 isNullConstant(LHS->getOperand(0)) &&
10703 isNullConstant(LHS->getOperand(1)) && isOneConstant(RHS)) {
10704 return DCI.CombineTo(N, SDValue(N, 0), LHS->getOperand(2));
10705 }
10706 }
10707
10708 if (Subtarget->isThumb1Only()) {
10709 SDValue RHS = N->getOperand(1);
10710 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
10711 int32_t imm = C->getSExtValue();
10712 if (imm < 0 && imm > std::numeric_limits<int>::min()) {
10713 SDLoc DL(N);
10714 RHS = DAG.getConstant(-imm, DL, MVT::i32);
10715 unsigned Opcode = (N->getOpcode() == ARMISD::ADDC) ? ARMISD::SUBC
10716 : ARMISD::ADDC;
10717 return DAG.getNode(Opcode, DL, N->getVTList(), N->getOperand(0), RHS);
10718 }
10719 }
10720 }
10721
10722 return SDValue();
10723}
10724
10727 const ARMSubtarget *Subtarget) {
10728 if (Subtarget->isThumb1Only()) {
10729 SelectionDAG &DAG = DCI.DAG;
10730 SDValue RHS = N->getOperand(1);
10731 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
10732 int64_t imm = C->getSExtValue();
10733 if (imm < 0) {
10734 SDLoc DL(N);
10735
10736 // The with-carry-in form matches bitwise not instead of the negation.
10737 // Effectively, the inverse interpretation of the carry flag already
10738 // accounts for part of the negation.
10739 RHS = DAG.getConstant(~imm, DL, MVT::i32);
10740
10741 unsigned Opcode = (N->getOpcode() == ARMISD::ADDE) ? ARMISD::SUBE
10742 : ARMISD::ADDE;
10743 return DAG.getNode(Opcode, DL, N->getVTList(),
10744 N->getOperand(0), RHS, N->getOperand(2));
10745 }
10746 }
10747 } else if (N->getOperand(1)->getOpcode() == ISD::SMUL_LOHI) {
10748 return AddCombineTo64bitMLAL(N, DCI, Subtarget);
10749 }
10750 return SDValue();
10751}
10752
10755 const ARMSubtarget *Subtarget) {
10756 SDValue res;
10757 SelectionDAG &DAG = DCI.DAG;
10758 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
10759
10760 if (TLI.isOperationLegal(N->getOpcode(), N->getValueType(0)))
10761 return SDValue();
10762
10763 if (!TLI.expandABS(N, res, DAG))
10764 return SDValue();
10765
10766 return res;
10767}
10768
10769/// PerformADDECombine - Target-specific dag combine transform from
10770/// ARMISD::ADDC, ARMISD::ADDE, and ISD::MUL_LOHI to MLAL or
10771/// ARMISD::ADDC, ARMISD::ADDE and ARMISD::UMLAL to ARMISD::UMAAL
10774 const ARMSubtarget *Subtarget) {
10775 // Only ARM and Thumb2 support UMLAL/SMLAL.
10776 if (Subtarget->isThumb1Only())
10777 return PerformAddeSubeCombine(N, DCI, Subtarget);
10778
10779 // Only perform the checks after legalize when the pattern is available.
10780 if (DCI.isBeforeLegalize()) return SDValue();
10781
10782 return AddCombineTo64bitUMAAL(N, DCI, Subtarget);
10783}
10784
10785/// PerformADDCombineWithOperands - Try DAG combinations for an ADD with
10786/// operands N0 and N1. This is a helper for PerformADDCombine that is
10787/// called with the default operands, and if that fails, with commuted
10788/// operands.
10791 const ARMSubtarget *Subtarget){
10792 // Attempt to create vpadd for this add.
10793 if (SDValue Result = AddCombineToVPADD(N, N0, N1, DCI, Subtarget))
10794 return Result;
10795
10796 // Attempt to create vpaddl for this add.
10797 if (SDValue Result = AddCombineVUZPToVPADDL(N, N0, N1, DCI, Subtarget))
10798 return Result;
10799 if (SDValue Result = AddCombineBUILD_VECTORToVPADDL(N, N0, N1, DCI,
10800 Subtarget))
10801 return Result;
10802
10803 // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
10804 if (N0.getNode()->hasOneUse())
10805 if (SDValue Result = combineSelectAndUse(N, N0, N1, DCI))
10806 return Result;
10807 return SDValue();
10808}
10809
10810bool
10812 CombineLevel Level) const {
10813 if (Level == BeforeLegalizeTypes)
10814 return true;
10815
10816 if (N->getOpcode() != ISD::SHL)
10817 return true;
10818
10819 if (Subtarget->isThumb1Only()) {
10820 // Avoid making expensive immediates by commuting shifts. (This logic
10821 // only applies to Thumb1 because ARM and Thumb2 immediates can be shifted
10822 // for free.)
10823 if (N->getOpcode() != ISD::SHL)
10824 return true;
10825 SDValue N1 = N->getOperand(0);
10826 if (N1->getOpcode() != ISD::ADD && N1->getOpcode() != ISD::AND &&
10827 N1->getOpcode() != ISD::OR && N1->getOpcode() != ISD::XOR)
10828 return true;
10829 if (auto *Const = dyn_cast<ConstantSDNode>(N1->getOperand(1))) {
10830 if (Const->getAPIntValue().ult(256))
10831 return false;
10832 if (N1->getOpcode() == ISD::ADD && Const->getAPIntValue().slt(0) &&
10833 Const->getAPIntValue().sgt(-256))
10834 return false;
10835 }
10836 return true;
10837 }
10838
10839 // Turn off commute-with-shift transform after legalization, so it doesn't
10840 // conflict with PerformSHLSimplify. (We could try to detect when
10841 // PerformSHLSimplify would trigger more precisely, but it isn't
10842 // really necessary.)
10843 return false;
10844}
10845
10847 const SDNode *N, CombineLevel Level) const {
10848 if (!Subtarget->isThumb1Only())
10849 return true;
10850
10851 if (Level == BeforeLegalizeTypes)
10852 return true;
10853
10854 return false;
10855}
10856
10858 if (!Subtarget->hasNEON()) {
10859 if (Subtarget->isThumb1Only())
10860 return VT.getScalarSizeInBits() <= 32;
10861 return true;
10862 }
10863 return VT.isScalarInteger();
10864}
10865
10868 const ARMSubtarget *ST) {
10869 // Allow the generic combiner to identify potential bswaps.
10870 if (DCI.isBeforeLegalize())
10871 return SDValue();
10872
10873 // DAG combiner will fold:
10874 // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
10875 // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2
10876 // Other code patterns that can be also be modified have the following form:
10877 // b + ((a << 1) | 510)
10878 // b + ((a << 1) & 510)
10879 // b + ((a << 1) ^ 510)
10880 // b + ((a << 1) + 510)
10881
10882 // Many instructions can perform the shift for free, but it requires both
10883 // the operands to be registers. If c1 << c2 is too large, a mov immediate
10884 // instruction will needed. So, unfold back to the original pattern if:
10885 // - if c1 and c2 are small enough that they don't require mov imms.
10886 // - the user(s) of the node can perform an shl
10887
10888 // No shifted operands for 16-bit instructions.
10889 if (ST->isThumb() && ST->isThumb1Only())
10890 return SDValue();
10891
10892 // Check that all the users could perform the shl themselves.
10893 for (auto U : N->uses()) {
10894 switch(U->getOpcode()) {
10895 default:
10896 return SDValue();
10897 case ISD::SUB:
10898 case ISD::ADD:
10899 case ISD::AND:
10900 case ISD::OR:
10901 case ISD::XOR:
10902 case ISD::SETCC:
10903 case ARMISD::CMP:
10904 // Check that the user isn't already using a constant because there
10905 // aren't any instructions that support an immediate operand and a
10906 // shifted operand.
10907 if (isa<ConstantSDNode>(U->getOperand(0)) ||
10908 isa<ConstantSDNode>(U->getOperand(1)))
10909 return SDValue();
10910
10911 // Check that it's not already using a shift.
10912 if (U->getOperand(0).getOpcode() == ISD::SHL ||
10913 U->getOperand(1).getOpcode() == ISD::SHL)
10914 return SDValue();
10915 break;
10916 }
10917 }
10918
10919 if (N->getOpcode() != ISD::ADD && N->getOpcode() != ISD::OR &&
10920 N->getOpcode() != ISD::XOR && N->getOpcode() != ISD::AND)
10921 return SDValue();
10922
10923 if (N->getOperand(0).getOpcode() != ISD::SHL)
10924 return SDValue();
10925
10926 SDValue SHL = N->getOperand(0);
10927
10928 auto *C1ShlC2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
10929 auto *C2 = dyn_cast<ConstantSDNode>(SHL.getOperand(1));
10930 if (!C1ShlC2 || !C2)
10931 return SDValue();
10932
10933 APInt C2Int = C2->getAPIntValue();
10934 APInt C1Int = C1ShlC2->getAPIntValue();
10935
10936 // Check that performing a lshr will not lose any information.
10938 C2Int.getBitWidth() - C2->getZExtValue());
10939 if ((C1Int & Mask) != C1Int)
10940 return SDValue();
10941
10942 // Shift the first constant.
10943 C1Int.lshrInPlace(C2Int);
10944
10945 // The immediates are encoded as an 8-bit value that can be rotated.
10946 auto LargeImm = [](const APInt &Imm) {
10947 unsigned Zeros = Imm.countLeadingZeros() + Imm.countTrailingZeros();
10948 return Imm.getBitWidth() - Zeros > 8;
10949 };
10950
10951 if (LargeImm(C1Int) || LargeImm(C2Int))
10952 return SDValue();
10953
10954 SelectionDAG &DAG = DCI.DAG;
10955 SDLoc dl(N);
10956 SDValue X = SHL.getOperand(0);
10957 SDValue BinOp = DAG.getNode(N->getOpcode(), dl, MVT::i32, X,
10958 DAG.getConstant(C1Int, dl, MVT::i32));
10959 // Shift left to compensate for the lshr of C1Int.
10960 SDValue Res = DAG.getNode(ISD::SHL, dl, MVT::i32, BinOp, SHL.getOperand(1));
10961
10962 LLVM_DEBUG(dbgs() << "Simplify shl use:\n"; SHL.getOperand(0).dump();
10963 SHL.dump(); N->dump());
10964 LLVM_DEBUG(dbgs() << "Into:\n"; X.dump(); BinOp.dump(); Res.dump());
10965 return Res;
10966}
10967
10968
10969/// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
10970///
10973 const ARMSubtarget *Subtarget) {
10974 SDValue N0 = N->getOperand(0);
10975 SDValue N1 = N->getOperand(1);
10976
10977 // Only works one way, because it needs an immediate operand.
10978 if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))
10979 return Result;
10980
10981 // First try with the default operand order.
10982 if (SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget))
10983 return Result;
10984
10985 // If that didn't work, try again with the operands commuted.
10986 return PerformADDCombineWithOperands(N, N1, N0, DCI, Subtarget);
10987}
10988
10989/// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.
10990///
10993 SDValue N0 = N->getOperand(0);
10994 SDValue N1 = N->getOperand(1);
10995
10996 // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
10997 if (N1.getNode()->hasOneUse())
10998 if (SDValue Result = combineSelectAndUse(N, N1, N0, DCI))
10999 return Result;
11000
11001 return SDValue();
11002}
11003
11004/// PerformVMULCombine
11005/// Distribute (A + B) * C to (A * C) + (B * C) to take advantage of the
11006/// special multiplier accumulator forwarding.
11007/// vmul d3, d0, d2
11008/// vmla d3, d1, d2
11009/// is faster than
11010/// vadd d3, d0, d1
11011/// vmul d3, d3, d2
11012// However, for (A + B) * (A + B),
11013// vadd d2, d0, d1
11014// vmul d3, d0, d2
11015// vmla d3, d1, d2
11016// is slower than
11017// vadd d2, d0, d1
11018// vmul d3, d2, d2
11021 const ARMSubtarget *Subtarget) {
11022 if (!Subtarget->hasVMLxForwarding())
11023 return SDValue();
11024
11025 SelectionDAG &DAG = DCI.DAG;
11026 SDValue N0 = N->getOperand(0);
11027 SDValue N1 = N->getOperand(1);
11028 unsigned Opcode = N0.getOpcode();
11029 if (Opcode != ISD::ADD && Opcode != ISD::SUB &&
11030 Opcode != ISD::FADD && Opcode != ISD::FSUB) {
11031 Opcode = N1.getOpcode();
11032 if (Opcode != ISD::ADD && Opcode != ISD::SUB &&
11033 Opcode != ISD::FADD && Opcode != ISD::FSUB)
11034 return SDValue();
11035 std::swap(N0, N1);
11036 }
11037
11038 if (N0 == N1)
11039 return SDValue();
11040
11041 EVT VT = N->getValueType(0);
11042 SDLoc DL(N);
11043 SDValue N00 = N0->getOperand(0);
11044 SDValue N01 = N0->getOperand(1);
11045 return DAG.getNode(Opcode, DL, VT,
11046 DAG.getNode(ISD::MUL, DL, VT, N00, N1),
11047 DAG.getNode(ISD::MUL, DL, VT, N01, N1));
11048}
11049
11052 const ARMSubtarget *Subtarget) {
11053 SelectionDAG &DAG = DCI.DAG;
11054
11055 if (Subtarget->isThumb1Only())
11056 return SDValue();
11057
11058 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
11059 return SDValue();
11060
11061 EVT VT = N->getValueType(0);
11062 if (VT.is64BitVector() || VT.is128BitVector())
11063 return PerformVMULCombine(N, DCI, Subtarget);
11064 if (VT != MVT::i32)
11065 return SDValue();
11066
11067 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
11068 if (!C)
11069 return SDValue();
11070
11071 int64_t MulAmt = C->getSExtValue();
11072 unsigned ShiftAmt = countTrailingZeros<uint64_t>(MulAmt);
11073
11074 ShiftAmt = ShiftAmt & (32 - 1);
11075 SDValue V = N->getOperand(0);
11076 SDLoc DL(N);
11077
11078 SDValue Res;
11079 MulAmt >>= ShiftAmt;
11080
11081 if (MulAmt >= 0) {
11082 if (isPowerOf2_32(MulAmt - 1)) {
11083 // (mul x, 2^N + 1) => (add (shl x, N), x)
11084 Res = DAG.getNode(ISD::ADD, DL, VT,
11085 V,
11086 DAG.getNode(ISD::SHL, DL, VT,
11087 V,
11088 DAG.getConstant(Log2_32(MulAmt - 1), DL,
11089 MVT::i32)));
11090 } else if (isPowerOf2_32(MulAmt + 1)) {
11091 // (mul x, 2^N - 1) => (sub (shl x, N), x)
11092 Res = DAG.getNode(ISD::SUB, DL, VT,
11093 DAG.getNode(ISD::SHL, DL, VT,
11094 V,
11095 DAG.getConstant(Log2_32(MulAmt + 1), DL,
11096 MVT::i32)),
11097 V);
11098 } else
11099 return SDValue();
11100 } else {
11101 uint64_t MulAmtAbs = -MulAmt;
11102 if (isPowerOf2_32(MulAmtAbs + 1)) {
11103 // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
11104 Res = DAG.getNode(ISD::SUB, DL, VT,
11105 V,
11106 DAG.getNode(ISD::SHL, DL, VT,
11107 V,
11108 DAG.getConstant(Log2_32(MulAmtAbs + 1), DL,
11109 MVT::i32)));
11110 } else if (isPowerOf2_32(MulAmtAbs - 1)) {
11111 // (mul x, -(2^N + 1)) => - (add (shl x, N), x)
11112 Res = DAG.getNode(ISD::ADD, DL, VT,
11113 V,
11114 DAG.getNode(ISD::SHL, DL, VT,
11115 V,
11116 DAG.getConstant(Log2_32(MulAmtAbs - 1), DL,
11117 MVT::i32)));
11118 Res = DAG.getNode(ISD::SUB, DL, VT,
11119 DAG.getConstant(0, DL, MVT::i32), Res);
11120 } else
11121 return SDValue();
11122 }
11123
11124 if (ShiftAmt != 0)
11125 Res = DAG.getNode(ISD::SHL, DL, VT,
11126 Res, DAG.getConstant(ShiftAmt, DL, MVT::i32));
11127
11128 // Do not add new nodes to DAG combiner worklist.
11129 DCI.CombineTo(N, Res, false);
11130 return SDValue();
11131}
11132
11135 const ARMSubtarget *Subtarget) {
11136 // Allow DAGCombine to pattern-match before we touch the canonical form.
11137 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
11138 return SDValue();
11139
11140 if (N->getValueType(0) != MVT::i32)
11141 return SDValue();
11142
11143 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
11144 if (!N1C)
11145 return SDValue();
11146
11147 uint32_t C1 = (uint32_t)N1C->getZExtValue();
11148 // Don't transform uxtb/uxth.
11149 if (C1 == 255 || C1 == 65535)
11150 return SDValue();
11151
11152 SDNode *N0 = N->getOperand(0).getNode();
11153 if (!N0->hasOneUse())
11154 return SDValue();
11155
11156 if (N0->getOpcode() != ISD::SHL && N0->getOpcode() != ISD::SRL)
11157 return SDValue();
11158
11159 bool LeftShift = N0->getOpcode() == ISD::SHL;
11160
11161 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
11162 if (!N01C)
11163 return SDValue();
11164
11165 uint32_t C2 = (uint32_t)N01C->getZExtValue();
11166 if (!C2 || C2 >= 32)
11167 return SDValue();
11168
11169 // Clear irrelevant bits in the mask.
11170 if (LeftShift)
11171 C1 &= (-1U << C2);
11172 else
11173 C1 &= (-1U >> C2);
11174
11175 SelectionDAG &DAG = DCI.DAG;
11176 SDLoc DL(N);
11177
11178 // We have a pattern of the form "(and (shl x, c2) c1)" or
11179 // "(and (srl x, c2) c1)", where c1 is a shifted mask. Try to
11180 // transform to a pair of shifts, to save materializing c1.
11181
11182 // First pattern: right shift, then mask off leading bits.
11183 // FIXME: Use demanded bits?
11184 if (!LeftShift && isMask_32(C1)) {
11185 uint32_t C3 = countLeadingZeros(C1);
11186 if (C2 < C3) {
11187 SDValue SHL = DAG.getNode(ISD::SHL, DL, MVT::i32, N0->getOperand(0),
11188 DAG.getConstant(C3 - C2, DL, MVT::i32));
11189 return DAG.getNode(ISD::SRL, DL, MVT::i32, SHL,
11190 DAG.getConstant(C3, DL, MVT::i32));
11191 }
11192 }
11193
11194 // First pattern, reversed: left shift, then mask off trailing bits.
11195 if (LeftShift && isMask_32(~C1)) {
11197 if (C2 < C3) {
11198 SDValue SHL = DAG.getNode(ISD::SRL, DL, MVT::i32, N0->getOperand(0),
11199 DAG.getConstant(C3 - C2, DL, MVT::i32));
11200 return DAG.getNode(ISD::SHL, DL, MVT::i32, SHL,
11201 DAG.getConstant(C3, DL, MVT::i32));
11202 }
11203 }
11204
11205 // Second pattern: left shift, then mask off leading bits.
11206 // FIXME: Use demanded bits?
11207 if (LeftShift && isShiftedMask_32(C1)) {
11208 uint32_t Trailing = countTrailingZeros(C1);
11209 uint32_t C3 = countLeadingZeros(C1);
11210 if (Trailing == C2 && C2 + C3 < 32) {
11211 SDValue SHL = DAG.getNode(ISD::SHL, DL, MVT::i32, N0->getOperand(0),
11212 DAG.getConstant(C2 + C3, DL, MVT::i32));
11213 return DAG.getNode(ISD::SRL, DL, MVT::i32, SHL,
11214 DAG.getConstant(C3, DL, MVT::i32));
11215 }
11216 }
11217
11218 // Second pattern, reversed: right shift, then mask off trailing bits.
11219 // FIXME: Handle other patterns of known/demanded bits.
11220 if (!LeftShift && isShiftedMask_32(C1)) {
11221 uint32_t Leading = countLeadingZeros(C1);
11223 if (Leading == C2 && C2 + C3 < 32) {
11224 SDValue SHL = DAG.getNode(ISD::SRL, DL, MVT::i32, N0->getOperand(0),
11225 DAG.getConstant(C2 + C3, DL, MVT::i32));
11226 return DAG.getNode(ISD::SHL, DL, MVT::i32, SHL,
11227 DAG.getConstant(C3, DL, MVT::i32));
11228 }
11229 }
11230
11231 // FIXME: Transform "(and (shl x, c2) c1)" ->
11232 // "(shl (and x, c1>>c2), c2)" if "c1 >> c2" is a cheaper immediate than
11233 // c1.
11234 return SDValue();
11235}
11236
11239 const ARMSubtarget *Subtarget) {
11240 // Attempt to use immediate-form VBIC
11241 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
11242 SDLoc dl(N);
11243 EVT VT = N->getValueType(0);
11244 SelectionDAG &DAG = DCI.DAG;
11245
11246 if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
11247 return SDValue();
11248
11249 APInt SplatBits, SplatUndef;
11250 unsigned SplatBitSize;
11251 bool HasAnyUndefs;
11252 if (BVN && Subtarget->hasNEON() &&
11253 BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
11254 if (SplatBitSize <= 64) {
11255 EVT VbicVT;
11256 SDValue Val = isNEONModifiedImm((~SplatBits).getZExtValue(),
11257 SplatUndef.getZExtValue(), SplatBitSize,
11258 DAG, dl, VbicVT, VT.is128BitVector(),
11259 OtherModImm);
11260 if (Val.getNode()) {
11261 SDValue Input =
11262 DAG.getNode(ISD::BITCAST, dl, VbicVT, N->getOperand(0));
11263 SDValue Vbic = DAG.getNode(ARMISD::VBICIMM, dl, VbicVT, Input, Val);
11264 return DAG.getNode(ISD::BITCAST, dl, VT, Vbic);
11265 }
11266 }
11267 }
11268
11269 if (!Subtarget->isThumb1Only()) {
11270 // fold (and (select cc, -1, c), x) -> (select cc, x, (and, x, c))
11271 if (SDValue Result = combineSelectAndUseCommutative(N, true, DCI))
11272 return Result;
11273
11274 if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))
11275 return Result;
11276 }
11277
11278 if (Subtarget->isThumb1Only())
11279 if (SDValue Result = CombineANDShift(N, DCI, Subtarget))
11280 return Result;
11281
11282 return SDValue();
11283}
11284
11285// Try combining OR nodes to SMULWB, SMULWT.
11288 const ARMSubtarget *Subtarget) {
11289 if (!Subtarget->hasV6Ops() ||
11290 (Subtarget->isThumb() &&
11291 (!Subtarget->hasThumb2() || !Subtarget->hasDSP())))
11292 return SDValue();
11293
11294 SDValue SRL = OR->getOperand(0);
11295 SDValue SHL = OR->getOperand(1);
11296
11297 if (SRL.getOpcode() != ISD::SRL || SHL.getOpcode() != ISD::SHL) {
11298 SRL = OR->getOperand(1);
11299 SHL = OR->getOperand(0);
11300 }
11301 if (!isSRL16(SRL) || !isSHL16(SHL))
11302 return SDValue();
11303
11304 // The first operands to the shifts need to be the two results from the
11305 // same smul_lohi node.
11306 if ((SRL.getOperand(0).getNode() != SHL.getOperand(0).getNode()) ||
11307 SRL.getOperand(0).getOpcode() != ISD::SMUL_LOHI)
11308 return SDValue();
11309
11310 SDNode *SMULLOHI = SRL.getOperand(0).getNode();
11311 if (SRL.getOperand(0) != SDValue(SMULLOHI, 0) ||
11312 SHL.getOperand(0) != SDValue(SMULLOHI, 1))
11313 return SDValue();
11314
11315 // Now we have:
11316 // (or (srl (smul_lohi ?, ?), 16), (shl (smul_lohi ?, ?), 16)))
11317 // For SMUL[B|T] smul_lohi will take a 32-bit and a 16-bit arguments.
11318 // For SMUWB the 16-bit value will signed extended somehow.
11319 // For SMULWT only the SRA is required.
11320 // Check both sides of SMUL_LOHI
11321 SDValue OpS16 = SMULLOHI->getOperand(0);
11322 SDValue OpS32 = SMULLOHI->getOperand(1);
11323
11324 SelectionDAG &DAG = DCI.DAG;
11325 if (!isS16(OpS16, DAG) && !isSRA16(OpS16)) {
11326 OpS16 = OpS32;
11327 OpS32 = SMULLOHI->getOperand(0);
11328 }
11329
11330 SDLoc dl(OR);
11331 unsigned Opcode = 0;
11332 if (isS16(OpS16, DAG))
11333 Opcode = ARMISD::SMULWB;
11334 else if (isSRA16(OpS16)) {
11335 Opcode = ARMISD::SMULWT;
11336 OpS16 = OpS16->getOperand(0);
11337 }
11338 else
11339 return SDValue();
11340
11341 SDValue Res = DAG.getNode(Opcode, dl, MVT::i32, OpS32, OpS16);
11342 DAG.ReplaceAllUsesOfValueWith(SDValue(OR, 0), Res);
11343 return SDValue(OR, 0);
11344}
11345
11348 const ARMSubtarget *Subtarget) {
11349 // BFI is only available on V6T2+
11350 if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops())
11351 return SDValue();
11352
11353 EVT VT = N->getValueType(0);
11354 SDValue N0 = N->getOperand(0);
11355 SDValue N1 = N->getOperand(1);
11356 SelectionDAG &DAG = DCI.DAG;
11357 SDLoc DL(N);
11358 // 1) or (and A, mask), val => ARMbfi A, val, mask
11359 // iff (val & mask) == val
11360 //
11361 // 2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
11362 // 2a) iff isBitFieldInvertedMask(mask) && isBitFieldInvertedMask(~mask2)
11363 // && mask == ~mask2
11364 // 2b) iff isBitFieldInvertedMask(~mask) && isBitFieldInvertedMask(mask2)
11365 // && ~mask == mask2
11366 // (i.e., copy a bitfield value into another bitfield of the same width)
11367
11368 if (VT != MVT::i32)
11369 return SDValue();
11370
11371 SDValue N00 = N0.getOperand(0);
11372
11373 // The value and the mask need to be constants so we can verify this is
11374 // actually a bitfield set. If the mask is 0xffff, we can do better
11375 // via a movt instruction, so don't use BFI in that case.
11376 SDValue MaskOp = N0.getOperand(1);
11377 ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(MaskOp);
11378 if (!MaskC)
11379 return SDValue();
11380 unsigned Mask = MaskC->getZExtValue();
11381 if (Mask == 0xffff)
11382 return SDValue();
11383 SDValue Res;
11384 // Case (1): or (and A, mask), val => ARMbfi A, val, mask
11385 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
11386 if (N1C) {
11387 unsigned Val = N1C->getZExtValue();
11388 if ((Val & ~Mask) != Val)
11389 return SDValue();
11390
11391 if (ARM::isBitFieldInvertedMask(Mask)) {
11392 Val >>= countTrailingZeros(~Mask);
11393
11394 Res = DAG.getNode(ARMISD::BFI, DL, VT, N00,
11395 DAG.getConstant(Val, DL, MVT::i32),
11396 DAG.getConstant(Mask, DL, MVT::i32));
11397
11398 DCI.CombineTo(N, Res, false);
11399 // Return value from the original node to inform the combiner than N is
11400 // now dead.
11401 return SDValue(N, 0);
11402 }
11403 } else if (N1.getOpcode() == ISD::AND) {
11404 // case (2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
11405 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
11406 if (!N11C)
11407 return SDValue();
11408 unsigned Mask2 = N11C->getZExtValue();
11409
11410 // Mask and ~Mask2 (or reverse) must be equivalent for the BFI pattern
11411 // as is to match.
11412 if (ARM::isBitFieldInvertedMask(Mask) &&
11413 (Mask == ~Mask2)) {
11414 // The pack halfword instruction works better for masks that fit it,
11415 // so use that when it's available.
11416 if (Subtarget->hasDSP() &&
11417 (Mask == 0xffff || Mask == 0xffff0000))
11418 return SDValue();
11419 // 2a
11420 unsigned amt = countTrailingZeros(Mask2);
11421 Res = DAG.getNode(ISD::SRL, DL, VT, N1.getOperand(0),
11422 DAG.getConstant(amt, DL, MVT::i32));
11423 Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, Res,
11424 DAG.getConstant(Mask, DL, MVT::i32));
11425 DCI.CombineTo(N, Res, false);
11426 // Return value from the original node to inform the combiner than N is
11427 // now dead.
11428 return SDValue(N, 0);
11429 } else if (ARM::isBitFieldInvertedMask(~Mask) &&
11430 (~Mask == Mask2)) {
11431 // The pack halfword instruction works better for masks that fit it,
11432 // so use that when it's available.
11433 if (Subtarget->hasDSP() &&
11434 (Mask2 == 0xffff || Mask2 == 0xffff0000))
11435 return SDValue();
11436 // 2b
11437 unsigned lsb = countTrailingZeros(Mask);
11438 Res = DAG.getNode(ISD::SRL, DL, VT, N00,
11439 DAG.getConstant(lsb, DL, MVT::i32));
11440 Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res,
11441 DAG.getConstant(Mask2, DL, MVT::i32));
11442 DCI.CombineTo(N, Res, false);
11443 // Return value from the original node to inform the combiner than N is
11444 // now dead.
11445 return SDValue(N, 0);
11446 }
11447 }
11448
11449 if (DAG.MaskedValueIsZero(N1, MaskC->getAPIntValue()) &&
11450 N00.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N00.getOperand(1)) &&
11452 // Case (3): or (and (shl A, #shamt), mask), B => ARMbfi B, A, ~mask
11453 // where lsb(mask) == #shamt and masked bits of B are known zero.
11454 SDValue ShAmt = N00.getOperand(1);
11455 unsigned ShAmtC = cast<ConstantSDNode>(ShAmt)->getZExtValue();
11456 unsigned LSB = countTrailingZeros(Mask);
11457 if (ShAmtC != LSB)
11458 return SDValue();
11459
11460 Res = DAG.getNode(ARMISD::BFI, DL, VT, N1, N00.getOperand(0),
11461 DAG.getConstant(~Mask, DL, MVT::i32));
11462
11463 DCI.CombineTo(N, Res, false);
11464 // Return value from the original node to inform the combiner than N is
11465 // now dead.
11466 return SDValue(N, 0);
11467 }
11468
11469 return SDValue();
11470}
11471
11472/// PerformORCombine - Target-specific dag combine xforms for ISD::OR
11475 const ARMSubtarget *Subtarget) {
11476 // Attempt to use immediate-form VORR
11477 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
11478 SDLoc dl(N);
11479 EVT VT = N->getValueType(0);
11480 SelectionDAG &DAG = DCI.DAG;
11481
11482 if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
11483 return SDValue();
11484
11485 APInt SplatBits, SplatUndef;
11486 unsigned SplatBitSize;
11487 bool HasAnyUndefs;
11488 if (BVN && Subtarget->hasNEON() &&
11489 BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
11490 if (SplatBitSize <= 64) {
11491 EVT VorrVT;
11492 SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(),
11493 SplatUndef.getZExtValue(), SplatBitSize,
11494 DAG, dl, VorrVT, VT.is128BitVector(),
11495 OtherModImm);
11496 if (Val.getNode()) {
11497 SDValue Input =
11498 DAG.getNode(ISD::BITCAST, dl, VorrVT, N->getOperand(0));
11499 SDValue Vorr = DAG.getNode(ARMISD::VORRIMM, dl, VorrVT, Input, Val);
11500 return DAG.getNode(ISD::BITCAST, dl, VT, Vorr);
11501 }
11502 }
11503 }
11504
11505 if (!Subtarget->isThumb1Only()) {
11506 // fold (or (select cc, 0, c), x) -> (select cc, x, (or, x, c))
11507 if (SDValue Result = combineSelectAndUseCommutative(N, false, DCI))
11508 return Result;
11509 if (SDValue Result = PerformORCombineToSMULWBT(N, DCI, Subtarget))
11510 return Result;
11511 }
11512
11513 SDValue N0 = N->getOperand(0);
11514 SDValue N1 = N->getOperand(1);
11515
11516 // (or (and B, A), (and C, ~A)) => (VBSL A, B, C) when A is a constant.
11517 if (Subtarget->hasNEON() && N1.getOpcode() == ISD::AND && VT.isVector() &&
11519
11520 // The code below optimizes (or (and X, Y), Z).
11521 // The AND operand needs to have a single user to make these optimizations
11522 // profitable.
11523 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
11524 return SDValue();
11525
11526 APInt SplatUndef;
11527 unsigned SplatBitSize;
11528 bool HasAnyUndefs;
11529
11530 APInt SplatBits0, SplatBits1;
11531 BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(1));
11532 BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(1));
11533 // Ensure that the second operand of both ands are constants
11534 if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize,
11535 HasAnyUndefs) && !HasAnyUndefs) {
11536 if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize,
11537 HasAnyUndefs) && !HasAnyUndefs) {
11538 // Ensure that the bit width of the constants are the same and that
11539 // the splat arguments are logical inverses as per the pattern we
11540 // are trying to simplify.
11541 if (SplatBits0.getBitWidth() == SplatBits1.getBitWidth() &&
11542 SplatBits0 == ~SplatBits1) {
11543 // Canonicalize the vector type to make instruction selection
11544 // simpler.
11545 EVT CanonicalVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
11546 SDValue Result = DAG.getNode(ARMISD::VBSL, dl, CanonicalVT,
11547 N0->getOperand(1),
11548 N0->getOperand(0),
11549 N1->getOperand(0));
11550 return DAG.getNode(ISD::BITCAST, dl, VT, Result);
11551 }
11552 }
11553 }
11554 }
11555
11556 // Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when
11557 // reasonable.
11558 if (N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
11559 if (SDValue Res = PerformORCombineToBFI(N, DCI, Subtarget))
11560 return Res;
11561 }
11562
11563 if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))
11564 return Result;
11565
11566 return SDValue();
11567}
11568
11571 const ARMSubtarget *Subtarget) {
11572 EVT VT = N->getValueType(0);
11573 SelectionDAG &DAG = DCI.DAG;
11574
11575 if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
11576 return SDValue();
11577
11578 if (!Subtarget->isThumb1Only()) {
11579 // fold (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c))
11580 if (SDValue Result = combineSelectAndUseCommutative(N, false, DCI))
11581 return Result;
11582
11583 if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))
11584 return Result;
11585 }
11586
11587 return SDValue();
11588}
11589
11590// ParseBFI - given a BFI instruction in N, extract the "from" value (Rn) and return it,
11591// and fill in FromMask and ToMask with (consecutive) bits in "from" to be extracted and
11592// their position in "to" (Rd).
11593static SDValue ParseBFI(SDNode *N, APInt &ToMask, APInt &FromMask) {
11594 assert(N->getOpcode() == ARMISD::BFI);
11595
11596 SDValue From = N->getOperand(1);
11597 ToMask = ~cast<ConstantSDNode>(N->getOperand(2))->getAPIntValue();
11598 FromMask = APInt::getLowBitsSet(ToMask.getBitWidth(), ToMask.countPopulation());
11599
11600 // If the Base came from a SHR #C, we can deduce that it is really testing bit
11601 // #C in the base of the SHR.
11602 if (From->getOpcode() == ISD::SRL &&
11603 isa<ConstantSDNode>(From->getOperand(1))) {
11604 APInt Shift = cast<ConstantSDNode>(From->getOperand(1))->getAPIntValue();
11605 assert(Shift.getLimitedValue() < 32 && "Shift too large!");
11606 FromMask <<= Shift.getLimitedValue(31);
11607 From = From->getOperand(0);
11608 }
11609
11610 return From;
11611}
11612
11613// If A and B contain one contiguous set of bits, does A | B == A . B?
11614//
11615// Neither A nor B must be zero.
11616static bool BitsProperlyConcatenate(const APInt &A, const APInt &B) {
11617 unsigned LastActiveBitInA = A.countTrailingZeros();
11618 unsigned FirstActiveBitInB = B.getBitWidth() - B.countLeadingZeros() - 1;
11619 return LastActiveBitInA - 1 == FirstActiveBitInB;
11620}
11621
11623 // We have a BFI in N. Follow a possible chain of BFIs and find a BFI it can combine with,
11624 // if one exists.
11625 APInt ToMask, FromMask;
11626 SDValue From = ParseBFI(N, ToMask, FromMask);
11627 SDValue To = N->getOperand(0);
11628
11629 // Now check for a compatible BFI to merge with. We can pass through BFIs that
11630 // aren't compatible, but not if they set the same bit in their destination as
11631 // we do (or that of any BFI we're going to combine with).
11632 SDValue V = To;
11633 APInt CombinedToMask = ToMask;
11634 while (V.getOpcode() == ARMISD::BFI) {
11635 APInt NewToMask, NewFromMask;
11636 SDValue NewFrom = ParseBFI(V.getNode(), NewToMask, NewFromMask);
11637 if (NewFrom != From) {
11638 // This BFI has a different base. Keep going.
11639 CombinedToMask |= NewToMask;
11640 V = V.getOperand(0);
11641 continue;
11642 }
11643
11644 // Do the written bits conflict with any we've seen so far?
11645 if ((NewToMask & CombinedToMask).getBoolValue())
11646 // Conflicting bits - bail out because going further is unsafe.
11647 return SDValue();
11648
11649 // Are the new bits contiguous when combined with the old bits?
11650 if (BitsProperlyConcatenate(ToMask, NewToMask) &&
11651 BitsProperlyConcatenate(FromMask, NewFromMask))
11652 return V;
11653 if (BitsProperlyConcatenate(NewToMask, ToMask) &&
11654 BitsProperlyConcatenate(NewFromMask, FromMask))
11655 return V;
11656
11657 // We've seen a write to some bits, so track it.
11658 CombinedToMask |= NewToMask;
11659 // Keep going...
11660 V = V.getOperand(0);
11661 }
11662
11663 return SDValue();
11664}
11665
11668 SDValue N1 = N->getOperand(1);
11669 if (N1.getOpcode() == ISD::AND) {
11670 // (bfi A, (and B, Mask1), Mask2) -> (bfi A, B, Mask2) iff
11671 // the bits being cleared by the AND are not demanded by the BFI.
11672 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
11673 if (!N11C)
11674 return SDValue();
11675 unsigned InvMask = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
11676 unsigned LSB = countTrailingZeros(~InvMask);
11677 unsigned Width = (32 - countLeadingZeros(~InvMask)) - LSB;
11678 assert(Width <
11679 static_cast<unsigned>(std::numeric_limits<unsigned>::digits) &&
11680 "undefined behavior");
11681 unsigned Mask = (1u << Width) - 1;
11682 unsigned Mask2 = N11C->getZExtValue();
11683 if ((Mask & (~Mask2)) == 0)
11684 return DCI.DAG.getNode(ARMISD::BFI, SDLoc(N), N->getValueType(0),
11685 N->getOperand(0), N1.getOperand(0),
11686 N->getOperand(2));
11687 } else if (N->getOperand(0).getOpcode() == ARMISD::BFI) {
11688 // We have a BFI of a BFI. Walk up the BFI chain to see how long it goes.
11689 // Keep track of any consecutive bits set that all come from the same base
11690 // value. We can combine these together into a single BFI.
11691 SDValue CombineBFI = FindBFIToCombineWith(N);
11692 if (CombineBFI == SDValue())
11693 return SDValue();
11694
11695 // We've found a BFI.
11696 APInt ToMask1, FromMask1;
11697 SDValue From1 = ParseBFI(N, ToMask1, FromMask1);
11698
11699 APInt ToMask2, FromMask2;
11700 SDValue From2 = ParseBFI(CombineBFI.getNode(), ToMask2, FromMask2);
11701 assert(From1 == From2);
11702 (void)From2;
11703
11704 // First, unlink CombineBFI.
11705 DCI.DAG.ReplaceAllUsesWith(CombineBFI, CombineBFI.getOperand(0));
11706 // Then create a new BFI, combining the two together.
11707 APInt NewFromMask = FromMask1 | FromMask2;
11708 APInt NewToMask = ToMask1 | ToMask2;
11709
11710 EVT VT = N->getValueType(0);
11711 SDLoc dl(N);
11712
11713 if (NewFromMask[0] == 0)
11714 From1 = DCI.DAG.getNode(
11715 ISD::SRL, dl, VT, From1,
11716 DCI.DAG.getConstant(NewFromMask.countTrailingZeros(), dl, VT));
11717 return DCI.DAG.getNode(ARMISD::BFI, dl, VT, N->getOperand(0), From1,
11718 DCI.DAG.getConstant(~NewToMask, dl, VT));
11719 }
11720 return SDValue();
11721}
11722
11723/// PerformVMOVRRDCombine - Target-specific dag combine xforms for
11724/// ARMISD::VMOVRRD.
11727 const ARMSubtarget *Subtarget) {
11728 // vmovrrd(vmovdrr x, y) -> x,y
11729 SDValue InDouble = N->getOperand(0);
11730 if (InDouble.getOpcode() == ARMISD::VMOVDRR && Subtarget->hasFP64())
11731 return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1));
11732
11733 // vmovrrd(load f64) -> (load i32), (load i32)
11734 SDNode *InNode = InDouble.getNode();
11735 if (ISD::isNormalLoad(InNode) && InNode->hasOneUse() &&
11736 InNode->getValueType(0) == MVT::f64 &&
11737 InNode->getOperand(1).getOpcode() == ISD::FrameIndex &&
11738 !cast<LoadSDNode>(InNode)->isVolatile()) {
11739 // TODO: Should this be done for non-FrameIndex operands?
11740 LoadSDNode *LD = cast<LoadSDNode>(InNode);
11741
11742 SelectionDAG &DAG = DCI.DAG;
11743 SDLoc DL(LD);
11744 SDValue BasePtr = LD->getBasePtr();
11745 SDValue NewLD1 =
11746 DAG.getLoad(MVT::i32, DL, LD->getChain(), BasePtr, LD->getPointerInfo(),
11747 LD->getAlignment(), LD->getMemOperand()->getFlags());
11748
11749 SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
11750 DAG.getConstant(4, DL, MVT::i32));
11751
11752 SDValue NewLD2 = DAG.getLoad(MVT::i32, DL, LD->getChain(), OffsetPtr,
11753 LD->getPointerInfo().getWithOffset(4),
11754 std::min(4U, LD->getAlignment()),
11755 LD->getMemOperand()->getFlags());
11756
11757 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1));
11758 if (DCI.DAG.getDataLayout().isBigEndian())
11759 std::swap (NewLD1, NewLD2);
11760 SDValue Result = DCI.CombineTo(N, NewLD1, NewLD2);
11761 return Result;
11762 }
11763
11764 return SDValue();
11765}
11766
11767/// PerformVMOVDRRCombine - Target-specific dag combine xforms for
11768/// ARMISD::VMOVDRR. This is also used for BUILD_VECTORs with 2 operands.
11770 // N=vmovrrd(X); vmovdrr(N:0, N:1) -> bit_convert(X)
11771 SDValue Op0 = N->getOperand(0);
11772 SDValue Op1 = N->getOperand(1);
11773 if (Op0.getOpcode() == ISD::BITCAST)
11774 Op0 = Op0.getOperand(0);
11775 if (Op1.getOpcode() == ISD::BITCAST)
11776 Op1 = Op1.getOperand(0);
11777 if (Op0.getOpcode() == ARMISD::VMOVRRD &&
11778 Op0.getNode() == Op1.getNode() &&
11779 Op0.getResNo() == 0 && Op1.getResNo() == 1)
11780 return DAG.getNode(ISD::BITCAST, SDLoc(N),
11781 N->getValueType(0), Op0.getOperand(0));
11782 return SDValue();
11783}
11784
11785/// hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node
11786/// are normal, non-volatile loads. If so, it is profitable to bitcast an
11787/// i64 vector to have f64 elements, since the value can then be loaded
11788/// directly into a VFP register.
11790 unsigned NumElts = N->getValueType(0).getVectorNumElements();
11791 for (unsigned i = 0; i < NumElts; ++i) {
11792 SDNode *Elt = N->getOperand(i).getNode();
11793 if (ISD::isNormalLoad(Elt) && !cast<LoadSDNode>(Elt)->isVolatile())
11794 return true;
11795 }
11796 return false;
11797}
11798
11799/// PerformBUILD_VECTORCombine - Target-specific dag combine xforms for
11800/// ISD::BUILD_VECTOR.
11803 const ARMSubtarget *Subtarget) {
11804 // build_vector(N=ARMISD::VMOVRRD(X), N:1) -> bit_convert(X):
11805 // VMOVRRD is introduced when legalizing i64 types. It forces the i64 value
11806 // into a pair of GPRs, which is fine when the value is used as a scalar,
11807 // but if the i64 value is converted to a vector, we need to undo the VMOVRRD.
11808 SelectionDAG &DAG = DCI.DAG;
11809 if (N->getNumOperands() == 2)
11810 if (SDValue RV = PerformVMOVDRRCombine(N, DAG))
11811 return RV;
11812
11813 // Load i64 elements as f64 values so that type legalization does not split
11814 // them up into i32 values.
11815 EVT VT = N->getValueType(0);
11817 return SDValue();
11818 SDLoc dl(N);
11820 unsigned NumElts = VT.getVectorNumElements();
11821 for (unsigned i = 0; i < NumElts; ++i) {
11822 SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(i));
11823 Ops.push_back(V);
11824 // Make the DAGCombiner fold the bitcast.
11825 DCI.AddToWorklist(V.getNode());
11826 }
11827 EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, NumElts);
11828 SDValue BV = DAG.getBuildVector(FloatVT, dl, Ops);
11829 return DAG.getNode(ISD::BITCAST, dl, VT, BV);
11830}
11831
11832/// Target-specific dag combine xforms for ARMISD::BUILD_VECTOR.
11833static SDValue
11835 // ARMISD::BUILD_VECTOR is introduced when legalizing ISD::BUILD_VECTOR.
11836 // At that time, we may have inserted bitcasts from integer to float.
11837 // If these bitcasts have survived DAGCombine, change the lowering of this
11838 // BUILD_VECTOR in something more vector friendly, i.e., that does not
11839 // force to use floating point types.
11840
11841 // Make sure we can change the type of the vector.
11842 // This is possible iff:
11843 // 1. The vector is only used in a bitcast to a integer type. I.e.,
11844 // 1.1. Vector is used only once.
11845 // 1.2. Use is a bit convert to an integer type.
11846 // 2. The size of its operands are 32-bits (64-bits are not legal).
11847 EVT VT = N->getValueType(0);
11848 EVT EltVT = VT.getVectorElementType();
11849
11850 // Check 1.1. and 2.
11851 if (EltVT.getSizeInBits() != 32 || !N->hasOneUse())
11852 return SDValue();
11853
11854 // By construction, the input type must be float.
11855 assert(EltVT == MVT::f32 && "Unexpected type!");
11856
11857 // Check 1.2.
11858 SDNode *Use = *N->use_begin();
11859 if (Use->getOpcode() != ISD::BITCAST ||
11860 Use->getValueType(0).isFloatingPoint())
11861 return SDValue();
11862
11863 // Check profitability.
11864 // Model is, if more than half of the relevant operands are bitcast from
11865 // i32, turn the build_vector into a sequence of insert_vector_elt.
11866 // Relevant operands are everything that is not statically
11867 // (i.e., at compile time) bitcasted.
11868 unsigned NumOfBitCastedElts = 0;
11869 unsigned NumElts = VT.getVectorNumElements();
11870 unsigned NumOfRelevantElts = NumElts;
11871 for (unsigned Idx = 0; Idx < NumElts; ++Idx) {
11872 SDValue Elt = N->getOperand(Idx);
11873 if (Elt->getOpcode() == ISD::BITCAST) {
11874 // Assume only bit cast to i32 will go away.
11875 if (Elt->getOperand(0).getValueType() == MVT::i32)
11876 ++NumOfBitCastedElts;
11877 } else if (Elt.isUndef() || isa<ConstantSDNode>(Elt))
11878 // Constants are statically casted, thus do not count them as
11879 // relevant operands.
11880 --NumOfRelevantElts;
11881 }
11882
11883 // Check if more than half of the elements require a non-free bitcast.
11884 if (NumOfBitCastedElts <= NumOfRelevantElts / 2)
11885 return SDValue();
11886
11887 SelectionDAG &DAG = DCI.DAG;
11888 // Create the new vector type.
11889 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts);
11890 // Check if the type is legal.
11891 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11892 if (!TLI.isTypeLegal(VecVT))
11893 return SDValue();
11894
11895 // Combine:
11896 // ARMISD::BUILD_VECTOR E1, E2, ..., EN.
11897 // => BITCAST INSERT_VECTOR_ELT
11898 // (INSERT_VECTOR_ELT (...), (BITCAST EN-1), N-1),
11899 // (BITCAST EN), N.
11900 SDValue Vec = DAG.getUNDEF(VecVT);
11901 SDLoc dl(N);
11902 for (unsigned Idx = 0 ; Idx < NumElts; ++Idx) {
11903 SDValue V = N->getOperand(Idx);
11904 if (V.isUndef())
11905 continue;
11906 if (V.getOpcode() == ISD::BITCAST &&
11907 V->getOperand(0).getValueType() == MVT::i32)
11908 // Fold obvious case.
11909 V = V.getOperand(0);
11910 else {
11911 V = DAG.getNode(ISD::BITCAST, SDLoc(V), MVT::i32, V);
11912 // Make the DAGCombiner fold the bitcasts.
11913 DCI.AddToWorklist(V.getNode());
11914 }
11915 SDValue LaneIdx = DAG.getConstant(Idx, dl, MVT::i32);
11916 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecVT, Vec, V, LaneIdx);
11917 }
11918 Vec = DAG.getNode(ISD::BITCAST, dl, VT, Vec);
11919 // Make the DAGCombiner fold the bitcasts.
11920 DCI.AddToWorklist(Vec.getNode());
11921 return Vec;
11922}
11923
11924/// PerformInsertEltCombine - Target-specific dag combine xforms for
11925/// ISD::INSERT_VECTOR_ELT.
11928 // Bitcast an i64 load inserted into a vector to f64.
11929 // Otherwise, the i64 value will be legalized to a pair of i32 values.
11930 EVT VT = N->getValueType(0);
11931 SDNode *Elt = N->getOperand(1).getNode();
11932 if (VT.getVectorElementType() != MVT::i64 ||
11933 !ISD::isNormalLoad(Elt) || cast<LoadSDNode>(Elt)->isVolatile())
11934 return SDValue();
11935
11936 SelectionDAG &DAG = DCI.DAG;
11937 SDLoc dl(N);
11938 EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,
11940 SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, N->getOperand(0));
11941 SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(1));
11942 // Make the DAGCombiner fold the bitcasts.
11943 DCI.AddToWorklist(Vec.getNode());
11944 DCI.AddToWorklist(V.getNode());
11945 SDValue InsElt = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, FloatVT,
11946 Vec, V, N->getOperand(2));
11947 return DAG.getNode(ISD::BITCAST, dl, VT, InsElt);
11948}
11949
11950/// PerformVECTOR_SHUFFLECombine - Target-specific dag combine xforms for
11951/// ISD::VECTOR_SHUFFLE.
11953 // The LLVM shufflevector instruction does not require the shuffle mask
11954 // length to match the operand vector length, but ISD::VECTOR_SHUFFLE does
11955 // have that requirement. When translating to ISD::VECTOR_SHUFFLE, if the
11956 // operands do not match the mask length, they are extended by concatenating
11957 // them with undef vectors. That is probably the right thing for other
11958 // targets, but for NEON it is better to concatenate two double-register
11959 // size vector operands into a single quad-register size vector. Do that
11960 // transformation here:
11961 // shuffle(concat(v1, undef), concat(v2, undef)) ->
11962 // shuffle(concat(v1, v2), undef)
11963 SDValue Op0 = N->getOperand(0);
11964 SDValue Op1 = N->getOperand(1);
11965 if (Op0.getOpcode() != ISD::CONCAT_VECTORS ||
11966 Op1.getOpcode() != ISD::CONCAT_VECTORS ||
11967 Op0.getNumOperands() != 2 ||
11968 Op1.getNumOperands() != 2)
11969 return SDValue();
11970 SDValue Concat0Op1 = Op0.getOperand(1);
11971 SDValue Concat1Op1 = Op1.getOperand(1);
11972 if (!Concat0Op1.isUndef() || !Concat1Op1.isUndef())
11973 return SDValue();
11974 // Skip the transformation if any of the types are illegal.
11975 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11976 EVT VT = N->getValueType(0);
11977 if (!TLI.isTypeLegal(VT) ||
11978 !TLI.isTypeLegal(Concat0Op1.getValueType()) ||
11979 !TLI.isTypeLegal(Concat1Op1.getValueType()))
11980 return SDValue();
11981
11982 SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
11983 Op0.getOperand(0), Op1.getOperand(0));
11984 // Translate the shuffle mask.
11985 SmallVector<int, 16> NewMask;
11986 unsigned NumElts = VT.getVectorNumElements();
11987 unsigned HalfElts = NumElts/2;
11988 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
11989 for (unsigned n = 0; n < NumElts; ++n) {
11990 int MaskElt = SVN->getMaskElt(n);
11991 int NewElt = -1;
11992 if (MaskElt < (int)HalfElts)
11993 NewElt = MaskElt;
11994 else if (MaskElt >= (int)NumElts && MaskElt < (int)(NumElts + HalfElts))
11995 NewElt = HalfElts + MaskElt - NumElts;
11996 NewMask.push_back(NewElt);
11997 }
11998 return DAG.getVectorShuffle(VT, SDLoc(N), NewConcat,
11999 DAG.getUNDEF(VT), NewMask);
12000}
12001
12002/// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP,
12003/// NEON load/store intrinsics, and generic vector load/stores, to merge
12004/// base address updates.
12005/// For generic load/stores, the memory type is assumed to be a vector.
12006/// The caller is assumed to have checked legality.
12009 SelectionDAG &DAG = DCI.DAG;
12010 const bool isIntrinsic = (N->getOpcode() == ISD::INTRINSIC_VOID ||
12011 N->getOpcode() == ISD::INTRINSIC_W_CHAIN);
12012 const bool isStore = N->getOpcode() == ISD::STORE;
12013 const unsigned AddrOpIdx = ((isIntrinsic || isStore) ? 2 : 1);
12014 SDValue Addr = N->getOperand(AddrOpIdx);
12015 MemSDNode *MemN = cast<MemSDNode>(N);
12016 SDLoc dl(N);
12017
12018 // Search for a use of the address operand that is an increment.
12019 for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
12020 UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
12021 SDNode *User = *UI;
12022 if (User->getOpcode() != ISD::ADD ||
12023 UI.getUse().getResNo() != Addr.getResNo())
12024 continue;
12025
12026 // Check that the add is independent of the load/store. Otherwise, folding
12027 // it would create a cycle. We can avoid searching through Addr as it's a
12028 // predecessor to both.
12031 Visited.insert(Addr.getNode());
12032 Worklist.push_back(N);
12033 Worklist.push_back(User);
12034 if (SDNode::hasPredecessorHelper(N, Visited, Worklist) ||
12035 SDNode::hasPredecessorHelper(User, Visited, Worklist))
12036 continue;
12037
12038 // Find the new opcode for the updating load/store.
12039 bool isLoadOp = true;
12040 bool isLaneOp = false;
12041 unsigned NewOpc = 0;
12042 unsigned NumVecs = 0;
12043 if (isIntrinsic) {
12044 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
12045 switch (IntNo) {
12046 default: llvm_unreachable("unexpected intrinsic for Neon base update");
12047 case Intrinsic::arm_neon_vld1: NewOpc = ARMISD::VLD1_UPD;
12048 NumVecs = 1; break;
12049 case Intrinsic::arm_neon_vld2: NewOpc = ARMISD::VLD2_UPD;
12050 NumVecs = 2; break;
12051 case Intrinsic::arm_neon_vld3: NewOpc = ARMISD::VLD3_UPD;
12052 NumVecs = 3; break;
12053 case Intrinsic::arm_neon_vld4: NewOpc = ARMISD::VLD4_UPD;
12054 NumVecs = 4; break;
12055 case Intrinsic::arm_neon_vld2dup:
12056 case Intrinsic::arm_neon_vld3dup:
12057 case Intrinsic::arm_neon_vld4dup:
12058 // TODO: Support updating VLDxDUP nodes. For now, we just skip
12059 // combining base updates for such intrinsics.
12060 continue;
12061 case Intrinsic::arm_neon_vld2lane: NewOpc = ARMISD::VLD2LN_UPD;
12062 NumVecs = 2; isLaneOp = true; break;
12063 case Intrinsic::arm_neon_vld3lane: NewOpc = ARMISD::VLD3LN_UPD;
12064 NumVecs = 3; isLaneOp = true; break;
12065 case Intrinsic::arm_neon_vld4lane: NewOpc = ARMISD::VLD4LN_UPD;
12066 NumVecs = 4; isLaneOp = true; break;
12067 case Intrinsic::arm_neon_vst1: NewOpc = ARMISD::VST1_UPD;
12068 NumVecs = 1; isLoadOp = false; break;
12069 case Intrinsic::arm_neon_vst2: NewOpc = ARMISD::VST2_UPD;
12070 NumVecs = 2; isLoadOp = false; break;
12071 case Intrinsic::arm_neon_vst3: NewOpc = ARMISD::VST3_UPD;
12072 NumVecs = 3; isLoadOp = false; break;
12073 case Intrinsic::arm_neon_vst4: NewOpc = ARMISD::VST4_UPD;
12074 NumVecs = 4; isLoadOp = false; break;
12075 case Intrinsic::arm_neon_vst2lane: NewOpc = ARMISD::VST2LN_UPD;
12076 NumVecs = 2; isLoadOp = false; isLaneOp = true; break;
12077 case Intrinsic::arm_neon_vst3lane: NewOpc = ARMISD::VST3LN_UPD;
12078 NumVecs = 3; isLoadOp = false; isLaneOp = true; break;
12079 case Intrinsic::arm_neon_vst4lane: NewOpc = ARMISD::VST4LN_UPD;
12080 NumVecs = 4; isLoadOp = false; isLaneOp = true; break;
12081 }
12082 } else {
12083 isLaneOp = true;
12084 switch (N->getOpcode()) {
12085 default: llvm_unreachable("unexpected opcode for Neon base update");
12086 case ARMISD::VLD1DUP: NewOpc = ARMISD::VLD1DUP_UPD; NumVecs = 1; break;
12087 case ARMISD::VLD2DUP: NewOpc = ARMISD::VLD2DUP_UPD; NumVecs = 2; break;
12088 case ARMISD::VLD3DUP: NewOpc = ARMISD::VLD3DUP_UPD; NumVecs = 3; break;
12089 case ARMISD::VLD4DUP: NewOpc = ARMISD::VLD4DUP_UPD; NumVecs = 4; break;
12090 case ISD::LOAD: NewOpc = ARMISD::VLD1_UPD;
12091 NumVecs = 1; isLaneOp = false; break;
12092 case ISD::STORE: NewOpc = ARMISD::VST1_UPD;
12093 NumVecs = 1; isLaneOp = false; isLoadOp = false; break;
12094 }
12095 }
12096
12097 // Find the size of memory referenced by the load/store.
12098 EVT VecTy;
12099 if (isLoadOp) {
12100 VecTy = N->getValueType(0);
12101 } else if (isIntrinsic) {
12102 VecTy = N->getOperand(AddrOpIdx+1).getValueType();
12103 } else {
12104 assert(isStore && "Node has to be a load, a store, or an intrinsic!");
12105 VecTy = N->getOperand(1).getValueType();
12106 }
12107
12108 unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
12109 if (isLaneOp)
12110 NumBytes /= VecTy.getVectorNumElements();
12111
12112 // If the increment is a constant, it must match the memory ref size.
12113 SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
12114 ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode());
12115 if (NumBytes >= 3 * 16 && (!CInc || CInc->getZExtValue() != NumBytes)) {
12116 // VLD3/4 and VST3/4 for 128-bit vectors are implemented with two
12117 // separate instructions that make it harder to use a non-constant update.
12118 continue;
12119 }
12120
12121 // OK, we found an ADD we can fold into the base update.
12122 // Now, create a _UPD node, taking care of not breaking alignment.
12123
12124 EVT AlignedVecTy = VecTy;
12125 unsigned Alignment = MemN->getAlignment();
12126
12127 // If this is a less-than-standard-aligned load/store, change the type to
12128 // match the standard alignment.
12129 // The alignment is overlooked when selecting _UPD variants; and it's
12130 // easier to introduce bitcasts here than fix that.
12131 // There are 3 ways to get to this base-update combine:
12132 // - intrinsics: they are assumed to be properly aligned (to the standard
12133 // alignment of the memory type), so we don't need to do anything.
12134 // - ARMISD::VLDx nodes: they are only generated from the aforementioned
12135 // intrinsics, so, likewise, there's nothing to do.
12136 // - generic load/store instructions: the alignment is specified as an
12137 // explicit operand, rather than implicitly as the standard alignment
12138 // of the memory type (like the intrisics). We need to change the
12139 // memory type to match the explicit alignment. That way, we don't
12140 // generate non-standard-aligned ARMISD::VLDx nodes.
12141 if (isa<LSBaseSDNode>(N)) {
12142 if (Alignment == 0)
12143 Alignment = 1;
12144 if (Alignment < VecTy.getScalarSizeInBits() / 8) {
12145 MVT EltTy = MVT::getIntegerVT(Alignment * 8);
12146 assert(NumVecs == 1 && "Unexpected multi-element generic load/store.");
12147 assert(!isLaneOp && "Unexpected generic load/store lane.");
12148 unsigned NumElts = NumBytes / (EltTy.getSizeInBits() / 8);
12149 AlignedVecTy = MVT::getVectorVT(EltTy, NumElts);
12150 }
12151 // Don't set an explicit alignment on regular load/stores that we want
12152 // to transform to VLD/VST 1_UPD nodes.
12153 // This matches the behavior of regular load/stores, which only get an
12154 // explicit alignment if the MMO alignment is larger than the standard
12155 // alignment of the memory type.
12156 // Intrinsics, however, always get an explicit alignment, set to the
12157 // alignment of the MMO.
12158 Alignment = 1;
12159 }
12160
12161 // Create the new updating load/store node.
12162 // First, create an SDVTList for the new updating node's results.
12163 EVT Tys[6];
12164 unsigned NumResultVecs = (isLoadOp ? NumVecs : 0);
12165 unsigned n;
12166 for (n = 0; n < NumResultVecs; ++n)
12167 Tys[n] = AlignedVecTy;
12168 Tys[n++] = MVT::i32;
12169 Tys[n] = MVT::Other;
12170 SDVTList SDTys = DAG.getVTList(makeArrayRef(Tys, NumResultVecs+2));
12171
12172 // Then, gather the new node's operands.
12174 Ops.push_back(N->getOperand(0)); // incoming chain
12175 Ops.push_back(N->getOperand(AddrOpIdx));
12176 Ops.push_back(Inc);
12177
12178 if (StoreSDNode *StN = dyn_cast<StoreSDNode>(N)) {
12179 // Try to match the intrinsic's signature
12180 Ops.push_back(StN->getValue());
12181 } else {
12182 // Loads (and of course intrinsics) match the intrinsics' signature,
12183 // so just add all but the alignment operand.
12184 for (unsigned i = AddrOpIdx + 1; i < N->getNumOperands() - 1; ++i)
12185 Ops.push_back(N->getOperand(i));
12186 }
12187
12188 // For all node types, the alignment operand is always the last one.
12189 Ops.push_back(DAG.getConstant(Alignment, dl, MVT::i32));
12190
12191 // If this is a non-standard-aligned STORE, the penultimate operand is the
12192 // stored value. Bitcast it to the aligned type.
12193 if (AlignedVecTy != VecTy && N->getOpcode() == ISD::STORE) {
12194 SDValue &StVal = Ops[Ops.size()-2];
12195 StVal = DAG.getNode(ISD::BITCAST, dl, AlignedVecTy, StVal);
12196 }
12197
12198 EVT LoadVT = isLaneOp ? VecTy.getVectorElementType() : AlignedVecTy;
12199 SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, dl, SDTys, Ops, LoadVT,
12200 MemN->getMemOperand());
12201
12202 // Update the uses.
12203 SmallVector<SDValue, 5> NewResults;
12204 for (unsigned i = 0; i < NumResultVecs; ++i)
12205 NewResults.push_back(SDValue(UpdN.getNode(), i));
12206
12207 // If this is an non-standard-aligned LOAD, the first result is the loaded
12208 // value. Bitcast it to the expected result type.
12209 if (AlignedVecTy != VecTy && N->getOpcode() == ISD::LOAD) {
12210 SDValue &LdVal = NewResults[0];
12211 LdVal = DAG.getNode(ISD::BITCAST, dl, VecTy, LdVal);
12212 }
12213
12214 NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs+1)); // chain
12215 DCI.CombineTo(N, NewResults);
12216 DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs));
12217
12218 break;
12219 }
12220 return SDValue();
12221}
12222
12225 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
12226 return SDValue();
12227
12228 return CombineBaseUpdate(N, DCI);
12229}
12230
12231/// CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a
12232/// vldN-lane (N > 1) intrinsic, and if all the other uses of that intrinsic
12233/// are also VDUPLANEs. If so, combine them to a vldN-dup operation and
12234/// return true.
12236 SelectionDAG &DAG = DCI.DAG;
12237 EVT VT = N->getValueType(0);
12238 // vldN-dup instructions only support 64-bit vectors for N > 1.
12239 if (!VT.is64BitVector())
12240 return false;
12241
12242 // Check if the VDUPLANE operand is a vldN-dup intrinsic.
12243 SDNode *VLD = N->getOperand(0).getNode();
12244 if (VLD->getOpcode() != ISD::INTRINSIC_W_CHAIN)
12245 return false;
12246 unsigned NumVecs = 0;
12247 unsigned NewOpc = 0;
12248 unsigned IntNo = cast<ConstantSDNode>(VLD->getOperand(1))->getZExtValue();
12249 if (IntNo == Intrinsic::arm_neon_vld2lane) {
12250 NumVecs = 2;
12251 NewOpc = ARMISD::VLD2DUP;
12252 } else if (IntNo == Intrinsic::arm_neon_vld3lane) {
12253 NumVecs = 3;
12254 NewOpc = ARMISD::VLD3DUP;
12255 } else if (IntNo == Intrinsic::arm_neon_vld4lane) {
12256 NumVecs = 4;
12257 NewOpc = ARMISD::VLD4DUP;
12258 } else {
12259 return false;
12260 }
12261
12262 // First check that all the vldN-lane uses are VDUPLANEs and that the lane
12263 // numbers match the load.
12264 unsigned VLDLaneNo =
12265 cast<ConstantSDNode>(VLD->getOperand(NumVecs+3))->getZExtValue();
12266 for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end();
12267 UI != UE; ++UI) {
12268 // Ignore uses of the chain result.
12269 if (UI.getUse().getResNo() == NumVecs)
12270 continue;
12271 SDNode *User = *UI;
12272 if (User->getOpcode() != ARMISD::VDUPLANE ||
12273 VLDLaneNo != cast<ConstantSDNode>(User->getOperand(1))->getZExtValue())
12274 return false;
12275 }
12276
12277 // Create the vldN-dup node.
12278 EVT Tys[5];
12279 unsigned n;
12280 for (n = 0; n < NumVecs; ++n)
12281 Tys[n] = VT;
12282 Tys[n] = MVT::Other;
12283 SDVTList SDTys = DAG.getVTList(makeArrayRef(Tys, NumVecs+1));
12284 SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) };
12285 MemIntrinsicSDNode *VLDMemInt = cast<MemIntrinsicSDNode>(VLD);
12286 SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, SDLoc(VLD), SDTys,
12287 Ops, VLDMemInt->getMemoryVT(),
12288 VLDMemInt->getMemOperand());
12289
12290 // Update the uses.
12291 for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end();
12292 UI != UE; ++UI) {
12293 unsigned ResNo = UI.getUse().getResNo();
12294 // Ignore uses of the chain result.
12295 if (ResNo == NumVecs)
12296 continue;
12297 SDNode *User = *UI;
12298 DCI.CombineTo(User, SDValue(VLDDup.getNode(), ResNo));
12299 }
12300
12301 // Now the vldN-lane intrinsic is dead except for its chain result.
12302 // Update uses of the chain.
12303 std::vector<SDValue> VLDDupResults;
12304 for (unsigned n = 0; n < NumVecs; ++n)
12305 VLDDupResults.push_back(SDValue(VLDDup.getNode(), n));
12306 VLDDupResults.push_back(SDValue(VLDDup.getNode(), NumVecs));
12307 DCI.CombineTo(VLD, VLDDupResults);
12308
12309 return true;
12310}
12311
12312/// PerformVDUPLANECombine - Target-specific dag combine xforms for
12313/// ARMISD::VDUPLANE.
12316 SDValue Op = N->getOperand(0);
12317
12318 // If the source is a vldN-lane (N > 1) intrinsic, and all the other uses
12319 // of that intrinsic are also VDUPLANEs, combine them to a vldN-dup operation.
12320 if (CombineVLDDUP(N, DCI))
12321 return SDValue(N, 0);
12322
12323 // If the source is already a VMOVIMM or VMVNIMM splat, the VDUPLANE is
12324 // redundant. Ignore bit_converts for now; element sizes are checked below.
12325 while (Op.getOpcode() == ISD::BITCAST)
12326 Op = Op.getOperand(0);
12327 if (Op.getOpcode() != ARMISD::VMOVIMM && Op.getOpcode() != ARMISD::VMVNIMM)
12328 return SDValue();
12329
12330 // Make sure the VMOV element size is not bigger than the VDUPLANE elements.
12331 unsigned EltSize = Op.getScalarValueSizeInBits();
12332 // The canonical VMOV for a zero vector uses a 32-bit element size.
12333 unsigned Imm = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
12334 unsigned EltBits;
12335 if (ARM_AM::decodeNEONModImm(Imm, EltBits) == 0)
12336 EltSize = 8;
12337 EVT VT = N->getValueType(0);
12338 if (EltSize > VT.getScalarSizeInBits())
12339 return SDValue();
12340
12341 return DCI.DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
12342}
12343
12344/// PerformVDUPCombine - Target-specific dag combine xforms for ARMISD::VDUP.
12347 const ARMSubtarget *Subtarget) {
12348 SelectionDAG &DAG = DCI.DAG;
12349 SDValue Op = N->getOperand(0);
12350
12351 if (!Subtarget->hasNEON())
12352 return SDValue();
12353
12354 // Match VDUP(LOAD) -> VLD1DUP.
12355 // We match this pattern here rather than waiting for isel because the
12356 // transform is only legal for unindexed loads.
12357 LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode());
12358 if (LD && Op.hasOneUse() && LD->isUnindexed() &&
12359 LD->getMemoryVT() == N->getValueType(0).getVectorElementType()) {
12360 SDValue Ops[] = { LD->getOperand(0), LD->getOperand(1),
12361 DAG.getConstant(LD->getAlignment(), SDLoc(N), MVT::i32) };
12362 SDVTList SDTys = DAG.getVTList(N->getValueType(0), MVT::Other);
12363 SDValue VLDDup = DAG.getMemIntrinsicNode(ARMISD::VLD1DUP, SDLoc(N), SDTys,
12364 Ops, LD->getMemoryVT(),
12365 LD->getMemOperand());
12366 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), VLDDup.getValue(1));
12367 return VLDDup;
12368 }
12369
12370 return SDValue();
12371}
12372
12375 EVT VT = N->getValueType(0);
12376
12377 // If this is a legal vector load, try to combine it into a VLD1_UPD.
12378 if (ISD::isNormalLoad(N) && VT.isVector() &&
12380 return CombineBaseUpdate(N, DCI);
12381
12382 return SDValue();
12383}
12384
12385/// PerformSTORECombine - Target-specific dag combine xforms for
12386/// ISD::STORE.
12389 StoreSDNode *St = cast<StoreSDNode>(N);
12390 if (St->isVolatile())
12391 return SDValue();
12392
12393 // Optimize trunc store (of multiple scalars) to shuffle and store. First,
12394 // pack all of the elements in one place. Next, store to memory in fewer
12395 // chunks.
12396 SDValue StVal = St->getValue();
12397 EVT VT = StVal.getValueType();
12398 if (St->isTruncatingStore() && VT.isVector()) {
12399 SelectionDAG &DAG = DCI.DAG;
12400 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12401 EVT StVT = St->getMemoryVT();
12402 unsigned NumElems = VT.getVectorNumElements();
12403 assert(StVT != VT && "Cannot truncate to the same type");
12404 unsigned FromEltSz = VT.getScalarSizeInBits();
12405 unsigned ToEltSz = StVT.getScalarSizeInBits();
12406
12407 // From, To sizes and ElemCount must be pow of two
12408 if (!isPowerOf2_32(NumElems * FromEltSz * ToEltSz)) return SDValue();
12409
12410 // We are going to use the original vector elt for storing.
12411 // Accumulated smaller vector elements must be a multiple of the store size.
12412 if (0 != (NumElems * FromEltSz) % ToEltSz) return SDValue();
12413
12414 unsigned SizeRatio = FromEltSz / ToEltSz;
12415 assert(SizeRatio * NumElems * ToEltSz == VT.getSizeInBits());
12416
12417 // Create a type on which we perform the shuffle.
12418 EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), StVT.getScalarType(),
12419 NumElems*SizeRatio);
12420 assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());
12421
12422 SDLoc DL(St);
12423 SDValue WideVec = DAG.getNode(ISD::BITCAST, DL, WideVecVT, StVal);
12424 SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
12425 for (unsigned i = 0; i < NumElems; ++i)
12426 ShuffleVec[i] = DAG.getDataLayout().isBigEndian()
12427 ? (i + 1) * SizeRatio - 1
12428 : i * SizeRatio;
12429
12430 // Can't shuffle using an illegal type.
12431 if (!TLI.isTypeLegal(WideVecVT)) return SDValue();
12432
12433 SDValue Shuff = DAG.getVectorShuffle(WideVecVT, DL, WideVec,
12434 DAG.getUNDEF(WideVec.getValueType()),
12435 ShuffleVec);
12436 // At this point all of the data is stored at the bottom of the
12437 // register. We now need to save it to mem.
12438
12439 // Find the largest store unit
12440 MVT StoreType = MVT::i8;
12441 for (MVT Tp : MVT::integer_valuetypes()) {
12442 if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() <= NumElems * ToEltSz)
12443 StoreType = Tp;
12444 }
12445 // Didn't find a legal store type.
12446 if (!TLI.isTypeLegal(StoreType))
12447 return SDValue();
12448
12449 // Bitcast the original vector into a vector of store-size units
12450 EVT StoreVecVT = EVT::getVectorVT(*DAG.getContext(),
12451 StoreType, VT.getSizeInBits()/EVT(StoreType).getSizeInBits());
12452 assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits());
12453 SDValue ShuffWide = DAG.getNode(ISD::BITCAST, DL, StoreVecVT, Shuff);
12455 SDValue Increment = DAG.getConstant(StoreType.getSizeInBits() / 8, DL,
12456 TLI.getPointerTy(DAG.getDataLayout()));
12457 SDValue BasePtr = St->getBasePtr();
12458
12459 // Perform one or more big stores into memory.
12460 unsigned E = (ToEltSz*NumElems)/StoreType.getSizeInBits();
12461 for (unsigned I = 0; I < E; I++) {
12462 SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
12463 StoreType, ShuffWide,
12464 DAG.getIntPtrConstant(I, DL));
12465 SDValue Ch = DAG.getStore(St->getChain(), DL, SubVec, BasePtr,
12466 St->getPointerInfo(), St->getAlignment(),
12467 St->getMemOperand()->getFlags());
12468 BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr,
12469 Increment);
12470 Chains.push_back(Ch);
12471 }
12472 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
12473 }
12474
12475 if (!ISD::isNormalStore(St))
12476 return SDValue();
12477
12478 // Split a store of a VMOVDRR into two integer stores to avoid mixing NEON and
12479 // ARM stores of arguments in the same cache line.
12480 if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR &&
12481 StVal.getNode()->hasOneUse()) {
12482 SelectionDAG &DAG = DCI.DAG;
12483 bool isBigEndian = DAG.getDataLayout().isBigEndian();
12484 SDLoc DL(St);
12485 SDValue BasePtr = St->getBasePtr();
12486 SDValue NewST1 = DAG.getStore(
12487 St->getChain(), DL, StVal.getNode()->getOperand(isBigEndian ? 1 : 0),
12488 BasePtr, St->getPointerInfo(), St->getAlignment(),
12489 St->getMemOperand()->getFlags());
12490
12491 SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
12492 DAG.getConstant(4, DL, MVT::i32));
12493 return DAG.getStore(NewST1.getValue(0), DL,
12494 StVal.getNode()->getOperand(isBigEndian ? 0 : 1),
12495 OffsetPtr, St->getPointerInfo(),
12496 std::min(4U, St->getAlignment() / 2),
12497 St->getMemOperand()->getFlags());
12498 }
12499
12500 if (StVal.getValueType() == MVT::i64 &&
12502
12503 // Bitcast an i64 store extracted from a vector to f64.
12504 // Otherwise, the i64 value will be legalized to a pair of i32 values.
12505 SelectionDAG &DAG = DCI.DAG;
12506 SDLoc dl(StVal);
12507 SDValue IntVec = StVal.getOperand(0);
12508 EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,
12510 SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, IntVec);
12512 Vec, StVal.getOperand(1));
12513 dl = SDLoc(N);
12514 SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ExtElt);
12515 // Make the DAGCombiner fold the bitcasts.
12516 DCI.AddToWorklist(Vec.getNode());
12517 DCI.AddToWorklist(ExtElt.getNode());
12518 DCI.AddToWorklist(V.getNode());
12519 return DAG.getStore(St->getChain(), dl, V, St->getBasePtr(),
12520 St->getPointerInfo(), St->getAlignment(),
12521 St->getMemOperand()->getFlags(), St->getAAInfo());
12522 }
12523
12524 // If this is a legal vector store, try to combine it into a VST1_UPD.
12525 if (ISD::isNormalStore(N) && VT.isVector() &&
12527 return CombineBaseUpdate(N, DCI);
12528
12529 return SDValue();
12530}
12531
12532/// PerformVCVTCombine - VCVT (floating-point to fixed-point, Advanced SIMD)
12533/// can replace combinations of VMUL and VCVT (floating-point to integer)
12534/// when the VMUL has a constant operand that is a power of 2.
12535///
12536/// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>):
12537/// vmul.f32 d16, d17, d16
12538/// vcvt.s32.f32 d16, d16
12539/// becomes:
12540/// vcvt.s32.f32 d16, d16, #3
12542 const ARMSubtarget *Subtarget) {
12543 if (!Subtarget->hasNEON())
12544 return SDValue();
12545
12546 SDValue Op = N->getOperand(0);
12547 if (!Op.getValueType().isVector() || !Op.getValueType().isSimple() ||
12548 Op.getOpcode() != ISD::FMUL)
12549 return SDValue();
12550
12551 SDValue ConstVec = Op->getOperand(1);
12552 if (!isa<BuildVectorSDNode>(ConstVec))
12553 return SDValue();
12554
12555 MVT FloatTy = Op.getSimpleValueType().getVectorElementType();
12556 uint32_t FloatBits = FloatTy.getSizeInBits();
12557 MVT IntTy = N->getSimpleValueType(0).getVectorElementType();
12558 uint32_t IntBits = IntTy.getSizeInBits();
12559 unsigned NumLanes = Op.getValueType().getVectorNumElements();
12560 if (FloatBits != 32 || IntBits > 32 || (NumLanes != 4 && NumLanes != 2)) {
12561 // These instructions only exist converting from f32 to i32. We can handle
12562 // smaller integers by generating an extra truncate, but larger ones would
12563 // be lossy. We also can't handle anything other than 2 or 4 lanes, since
12564 // these intructions only support v2i32/v4i32 types.
12565 return SDValue();
12566 }
12567
12568 BitVector UndefElements;
12569 BuildVectorSDNode *BV = cast<BuildVectorSDNode>(ConstVec);
12570 int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, 33);
12571 if (C == -1 || C == 0 || C > 32)
12572 return SDValue();
12573
12574 SDLoc dl(N);
12575 bool isSigned = N->getOpcode() == ISD::FP_TO_SINT;
12576 unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfp2fxs :
12577 Intrinsic::arm_neon_vcvtfp2fxu;
12578 SDValue FixConv = DAG.getNode(
12579 ISD::INTRINSIC_WO_CHAIN, dl, NumLanes == 2 ? MVT::v2i32 : MVT::v4i32,
12580 DAG.getConstant(IntrinsicOpcode, dl, MVT::i32), Op->getOperand(0),
12581 DAG.getConstant(C, dl, MVT::i32));
12582
12583 if (IntBits < FloatBits)
12584 FixConv = DAG.getNode(ISD::TRUNCATE, dl, N->getValueType(0), FixConv);
12585
12586 return FixConv;
12587}
12588
12589/// PerformVDIVCombine - VCVT (fixed-point to floating-point, Advanced SIMD)
12590/// can replace combinations of VCVT (integer to floating-point) and VDIV
12591/// when the VDIV has a constant operand that is a power of 2.
12592///
12593/// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>):
12594/// vcvt.f32.s32 d16, d16
12595/// vdiv.f32 d16, d17, d16
12596/// becomes:
12597/// vcvt.f32.s32 d16, d16, #3
12599 const ARMSubtarget *Subtarget) {
12600 if (!Subtarget->hasNEON())
12601 return SDValue();
12602
12603 SDValue Op = N->getOperand(0);
12604 unsigned OpOpcode = Op.getNode()->getOpcode();
12605 if (!N->getValueType(0).isVector() || !N->getValueType(0).isSimple() ||
12606 (OpOpcode != ISD::SINT_TO_FP && OpOpcode != ISD::UINT_TO_FP))
12607 return SDValue();
12608
12609 SDValue ConstVec = N->getOperand(1);
12610 if (!isa<BuildVectorSDNode>(ConstVec))
12611 return SDValue();
12612
12613 MVT FloatTy = N->getSimpleValueType(0).getVectorElementType();
12614 uint32_t FloatBits = FloatTy.getSizeInBits();
12615 MVT IntTy = Op.getOperand(0).getSimpleValueType().getVectorElementType();
12616 uint32_t IntBits = IntTy.getSizeInBits();
12617 unsigned NumLanes = Op.getValueType().getVectorNumElements();
12618 if (FloatBits != 32 || IntBits > 32 || (NumLanes != 4 && NumLanes != 2)) {
12619 // These instructions only exist converting from i32 to f32. We can handle
12620 // smaller integers by generating an extra extend, but larger ones would
12621 // be lossy. We also can't handle anything other than 2 or 4 lanes, since
12622 // these intructions only support v2i32/v4i32 types.
12623 return SDValue();
12624 }
12625
12626 BitVector UndefElements;
12627 BuildVectorSDNode *BV = cast<BuildVectorSDNode>(ConstVec);
12628 int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, 33);
12629 if (C == -1 || C == 0 || C > 32)
12630 return SDValue();
12631
12632 SDLoc dl(N);
12633 bool isSigned = OpOpcode == ISD::SINT_TO_FP;
12634 SDValue ConvInput = Op.getOperand(0);
12635 if (IntBits < FloatBits)
12636 ConvInput = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
12637 dl, NumLanes == 2 ? MVT::v2i32 : MVT::v4i32,
12638 ConvInput);
12639
12640 unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfxs2fp :
12641 Intrinsic::arm_neon_vcvtfxu2fp;
12642 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl,
12643 Op.getValueType(),
12644 DAG.getConstant(IntrinsicOpcode, dl, MVT::i32),
12645 ConvInput, DAG.getConstant(C, dl, MVT::i32));
12646}
12647
12648/// PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics.
12650 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
12651 switch (IntNo) {
12652 default:
12653 // Don't do anything for most intrinsics.
12654 break;
12655
12656 // Vector shifts: check for immediate versions and lower them.
12657 // Note: This is done during DAG combining instead of DAG legalizing because
12658 // the build_vectors for 64-bit vector element shift counts are generally
12659 // not legal, and it is hard to see their values after they get legalized to
12660 // loads from a constant pool.
12661 case Intrinsic::arm_neon_vshifts:
12662 case Intrinsic::arm_neon_vshiftu:
12663 case Intrinsic::arm_neon_vrshifts:
12664 case Intrinsic::arm_neon_vrshiftu:
12665 case Intrinsic::arm_neon_vrshiftn:
12666 case Intrinsic::arm_neon_vqshifts:
12667 case Intrinsic::arm_neon_vqshiftu:
12668 case Intrinsic::arm_neon_vqshiftsu:
12669 case Intrinsic::arm_neon_vqshiftns:
12670 case Intrinsic::arm_neon_vqshiftnu:
12671 case Intrinsic::arm_neon_vqshiftnsu:
12672 case Intrinsic::arm_neon_vqrshiftns:
12673 case Intrinsic::arm_neon_vqrshiftnu:
12674 case Intrinsic::arm_neon_vqrshiftnsu: {
12675 EVT VT = N->getOperand(1).getValueType();
12676 int64_t Cnt;
12677 unsigned VShiftOpc = 0;
12678
12679 switch (IntNo) {
12680 case Intrinsic::arm_neon_vshifts:
12681 case Intrinsic::arm_neon_vshiftu:
12682 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) {
12683 VShiftOpc = ARMISD::VSHLIMM;
12684 break;
12685 }
12686 if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) {
12687 VShiftOpc = (IntNo == Intrinsic::arm_neon_vshifts ? ARMISD::VSHRsIMM
12689 break;
12690 }
12691 return SDValue();
12692
12693 case Intrinsic::arm_neon_vrshifts:
12694 case Intrinsic::arm_neon_vrshiftu:
12695 if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt))
12696 break;
12697 return SDValue();
12698
12699 case Intrinsic::arm_neon_vqshifts:
12700 case Intrinsic::arm_neon_vqshiftu:
12701 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
12702 break;
12703 return SDValue();
12704
12705 case Intrinsic::arm_neon_vqshiftsu:
12706 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
12707 break;
12708 llvm_unreachable("invalid shift count for vqshlu intrinsic");
12709
12710 case Intrinsic::arm_neon_vrshiftn:
12711 case Intrinsic::arm_neon_vqshiftns:
12712 case Intrinsic::arm_neon_vqshiftnu:
12713 case Intrinsic::arm_neon_vqshiftnsu:
12714 case Intrinsic::arm_neon_vqrshiftns:
12715 case Intrinsic::arm_neon_vqrshiftnu:
12716 case Intrinsic::arm_neon_vqrshiftnsu:
12717 // Narrowing shifts require an immediate right shift.
12718 if (isVShiftRImm(N->getOperand(2), VT, true, true, Cnt))
12719 break;
12720 llvm_unreachable("invalid shift count for narrowing vector shift "
12721 "intrinsic");
12722
12723 default:
12724 llvm_unreachable("unhandled vector shift");
12725 }
12726
12727 switch (IntNo) {
12728 case Intrinsic::arm_neon_vshifts:
12729 case Intrinsic::arm_neon_vshiftu:
12730 // Opcode already set above.
12731 break;
12732 case Intrinsic::arm_neon_vrshifts:
12733 VShiftOpc = ARMISD::VRSHRsIMM;
12734 break;
12735 case Intrinsic::arm_neon_vrshiftu:
12736 VShiftOpc = ARMISD::VRSHRuIMM;
12737 break;
12738 case Intrinsic::arm_neon_vrshiftn:
12739 VShiftOpc = ARMISD::VRSHRNIMM;
12740 break;
12741 case Intrinsic::arm_neon_vqshifts:
12742 VShiftOpc = ARMISD::VQSHLsIMM;
12743 break;
12744 case Intrinsic::arm_neon_vqshiftu:
12745 VShiftOpc = ARMISD::VQSHLuIMM;
12746 break;
12747 case Intrinsic::arm_neon_vqshiftsu:
12748 VShiftOpc = ARMISD::VQSHLsuIMM;
12749 break;
12750 case Intrinsic::arm_neon_vqshiftns:
12751 VShiftOpc = ARMISD::VQSHRNsIMM;
12752 break;
12753 case Intrinsic::arm_neon_vqshiftnu:
12754 VShiftOpc = ARMISD::VQSHRNuIMM;
12755 break;
12756 case Intrinsic::arm_neon_vqshiftnsu:
12757 VShiftOpc = ARMISD::VQSHRNsuIMM;
12758 break;
12759 case Intrinsic::arm_neon_vqrshiftns:
12760 VShiftOpc = ARMISD::VQRSHRNsIMM;
12761 break;
12762 case Intrinsic::arm_neon_vqrshiftnu:
12763 VShiftOpc = ARMISD::VQRSHRNuIMM;
12764 break;
12765 case Intrinsic::arm_neon_vqrshiftnsu:
12766 VShiftOpc = ARMISD::VQRSHRNsuIMM;
12767 break;
12768 }
12769
12770 SDLoc dl(N);
12771 return DAG.getNode(VShiftOpc, dl, N->getValueType(0),
12772 N->getOperand(1), DAG.getConstant(Cnt, dl, MVT::i32));
12773 }
12774
12775 case Intrinsic::arm_neon_vshiftins: {
12776 EVT VT = N->getOperand(1).getValueType();
12777 int64_t Cnt;
12778 unsigned VShiftOpc = 0;
12779
12780 if (isVShiftLImm(N->getOperand(3), VT, false, Cnt))
12781 VShiftOpc = ARMISD::VSLIIMM;
12782 else if (isVShiftRImm(N->getOperand(3), VT, false, true, Cnt))
12783 VShiftOpc = ARMISD::VSRIIMM;
12784 else {
12785 llvm_unreachable("invalid shift count for vsli/vsri intrinsic");
12786 }
12787
12788 SDLoc dl(N);
12789 return DAG.getNode(VShiftOpc, dl, N->getValueType(0),
12790 N->getOperand(1), N->getOperand(2),
12791 DAG.getConstant(Cnt, dl, MVT::i32));
12792 }
12793
12794 case Intrinsic::arm_neon_vqrshifts:
12795 case Intrinsic::arm_neon_vqrshiftu:
12796 // No immediate versions of these to check for.
12797 break;
12798 }
12799
12800 return SDValue();
12801}
12802
12803/// PerformShiftCombine - Checks for immediate versions of vector shifts and
12804/// lowers them. As with the vector shift intrinsics, this is done during DAG
12805/// combining instead of DAG legalizing because the build_vectors for 64-bit
12806/// vector element shift counts are generally not legal, and it is hard to see
12807/// their values after they get legalized to loads from a constant pool.
12810 const ARMSubtarget *ST) {
12811 SelectionDAG &DAG = DCI.DAG;
12812 EVT VT = N->getValueType(0);
12813 if (N->getOpcode() == ISD::SRL && VT == MVT::i32 && ST->hasV6Ops()) {
12814 // Canonicalize (srl (bswap x), 16) to (rotr (bswap x), 16) if the high
12815 // 16-bits of x is zero. This optimizes rev + lsr 16 to rev16.
12816 SDValue N1 = N->getOperand(1);
12817 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
12818 SDValue N0 = N->getOperand(0);
12819 if (C->getZExtValue() == 16 && N0.getOpcode() == ISD::BSWAP &&
12820 DAG.MaskedValueIsZero(N0.getOperand(0),
12821 APInt::getHighBitsSet(32, 16)))
12822 return DAG.getNode(ISD::ROTR, SDLoc(N), VT, N0, N1);
12823 }
12824 }
12825
12826 if (ST->isThumb1Only() && N->getOpcode() == ISD::SHL && VT == MVT::i32 &&
12827 N->getOperand(0)->getOpcode() == ISD::AND &&
12828 N->getOperand(0)->hasOneUse()) {
12829 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
12830 return SDValue();
12831 // Look for the pattern (shl (and x, AndMask), ShiftAmt). This doesn't
12832 // usually show up because instcombine prefers to canonicalize it to
12833 // (and (shl x, ShiftAmt) (shl AndMask, ShiftAmt)), but the shift can come
12834 // out of GEP lowering in some cases.
12835 SDValue N0 = N->getOperand(0);
12836 ConstantSDNode *ShiftAmtNode = dyn_cast<ConstantSDNode>(N->getOperand(1));
12837 if (!ShiftAmtNode)
12838 return SDValue();
12839 uint32_t ShiftAmt = static_cast<uint32_t>(ShiftAmtNode->getZExtValue());
12840 ConstantSDNode *AndMaskNode = dyn_cast<ConstantSDNode>(N0->getOperand(1));
12841 if (!AndMaskNode)
12842 return SDValue();
12843 uint32_t AndMask = static_cast<uint32_t>(AndMaskNode->getZExtValue());
12844 // Don't transform uxtb/uxth.
12845 if (AndMask == 255 || AndMask == 65535)
12846 return SDValue();
12847 if (isMask_32(AndMask)) {
12848 uint32_t MaskedBits = countLeadingZeros(AndMask);
12849 if (MaskedBits > ShiftAmt) {
12850 SDLoc DL(N);
12851 SDValue SHL = DAG.getNode(ISD::SHL, DL, MVT::i32, N0->getOperand(0),
12852 DAG.getConstant(MaskedBits, DL, MVT::i32));
12853 return DAG.getNode(
12854 ISD::SRL, DL, MVT::i32, SHL,
12855 DAG.getConstant(MaskedBits - ShiftAmt, DL, MVT::i32));
12856 }
12857 }
12858 }
12859
12860 // Nothing to be done for scalar shifts.
12861 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12862 if (!VT.isVector() || !TLI.isTypeLegal(VT))
12863 return SDValue();
12864 if (ST->hasMVEIntegerOps() && VT == MVT::v2i64)
12865 return SDValue();
12866
12867 int64_t Cnt;
12868
12869 switch (N->getOpcode()) {
12870 default: llvm_unreachable("unexpected shift opcode");
12871
12872 case ISD::SHL:
12873 if (isVShiftLImm(N->getOperand(1), VT, false, Cnt)) {
12874 SDLoc dl(N);
12875 return DAG.getNode(ARMISD::VSHLIMM, dl, VT, N->getOperand(0),
12876 DAG.getConstant(Cnt, dl, MVT::i32));
12877 }
12878 break;
12879
12880 case ISD::SRA:
12881 case ISD::SRL:
12882 if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) {
12883 unsigned VShiftOpc =
12884 (N->getOpcode() == ISD::SRA ? ARMISD::VSHRsIMM : ARMISD::VSHRuIMM);
12885 SDLoc dl(N);
12886 return DAG.getNode(VShiftOpc, dl, VT, N->getOperand(0),
12887 DAG.getConstant(Cnt, dl, MVT::i32));
12888 }
12889 }
12890 return SDValue();
12891}
12892
12893/// PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND,
12894/// ISD::ZERO_EXTEND, and ISD::ANY_EXTEND.
12896 const ARMSubtarget *ST) {
12897 SDValue N0 = N->getOperand(0);
12898
12899 // Check for sign- and zero-extensions of vector extract operations of 8-
12900 // and 16-bit vector elements. NEON supports these directly. They are
12901 // handled during DAG combining because type legalization will promote them
12902 // to 32-bit types and it is messy to recognize the operations after that.
12903 if (ST->hasNEON() && N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
12904 SDValue Vec = N0.getOperand(0);
12905 SDValue Lane = N0.getOperand(1);
12906 EVT VT = N->getValueType(0);
12907 EVT EltVT = N0.getValueType();
12908 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12909
12910 if (VT == MVT::i32 &&
12911 (EltVT == MVT::i8 || EltVT == MVT::i16) &&
12912 TLI.isTypeLegal(Vec.getValueType()) &&
12913 isa<ConstantSDNode>(Lane)) {
12914
12915 unsigned Opc = 0;
12916 switch (N->getOpcode()) {
12917 default: llvm_unreachable("unexpected opcode");
12918 case ISD::SIGN_EXTEND:
12919 Opc = ARMISD::VGETLANEs;
12920 break;
12921 case ISD::ZERO_EXTEND:
12922 case ISD::ANY_EXTEND:
12923 Opc = ARMISD::VGETLANEu;
12924 break;
12925 }
12926 return DAG.getNode(Opc, SDLoc(N), VT, Vec, Lane);
12927 }
12928 }
12929
12930 return SDValue();
12931}
12932
12934 ConstantSDNode *C = dyn_cast<ConstantSDNode>(V);
12935 if (!C)
12936 return nullptr;
12937 const APInt *CV = &C->getAPIntValue();
12938 return CV->isPowerOf2() ? CV : nullptr;
12939}
12940
12942 // If we have a CMOV, OR and AND combination such as:
12943 // if (x & CN)
12944 // y |= CM;
12945 //
12946 // And:
12947 // * CN is a single bit;
12948 // * All bits covered by CM are known zero in y
12949 //
12950 // Then we can convert this into a sequence of BFI instructions. This will
12951 // always be a win if CM is a single bit, will always be no worse than the
12952 // TST&OR sequence if CM is two bits, and for thumb will be no worse if CM is
12953 // three bits (due to the extra IT instruction).
12954
12955 SDValue Op0 = CMOV->getOperand(0);
12956 SDValue Op1 = CMOV->getOperand(1);
12957 auto CCNode = cast<ConstantSDNode>(CMOV->getOperand(2));
12958 auto CC = CCNode->getAPIntValue().getLimitedValue();
12959 SDValue CmpZ = CMOV->getOperand(4);
12960
12961 // The compare must be against zero.
12962 if (!isNullConstant(CmpZ->getOperand(1)))
12963 return SDValue();
12964
12965 assert(CmpZ->getOpcode() == ARMISD::CMPZ);
12966 SDValue And = CmpZ->getOperand(0);
12967 if (And->getOpcode() != ISD::AND)
12968 return SDValue();
12969 const APInt *AndC = isPowerOf2Constant(And->getOperand(1));
12970 if (!AndC)
12971 return SDValue();
12972 SDValue X = And->getOperand(0);
12973
12974 if (CC == ARMCC::EQ) {
12975 // We're performing an "equal to zero" compare. Swap the operands so we
12976 // canonicalize on a "not equal to zero" compare.
12977 std::swap(Op0, Op1);
12978 } else {
12979 assert(CC == ARMCC::NE && "How can a CMPZ node not be EQ or NE?");
12980 }
12981
12982 if (Op1->getOpcode() != ISD::OR)
12983 return SDValue();
12984
12985 ConstantSDNode *OrC = dyn_cast<ConstantSDNode>(Op1->getOperand(1));
12986 if (!OrC)
12987 return SDValue();
12988 SDValue Y = Op1->getOperand(0);
12989
12990 if (Op0 != Y)
12991 return SDValue();
12992
12993 // Now, is it profitable to continue?
12994 APInt OrCI = OrC->getAPIntValue();
12995 unsigned Heuristic = Subtarget->isThumb() ? 3 : 2;
12996 if (OrCI.countPopulation() > Heuristic)
12997 return SDValue();
12998
12999 // Lastly, can we determine that the bits defined by OrCI
13000 // are zero in Y?
13001 KnownBits Known = DAG.computeKnownBits(Y);
13002 if ((OrCI & Known.Zero) != OrCI)
13003 return SDValue();
13004
13005 // OK, we can do the combine.
13006 SDValue V = Y;
13007 SDLoc dl(X);
13008 EVT VT = X.getValueType();
13009 unsigned BitInX = AndC->logBase2();
13010
13011 if (BitInX != 0) {
13012 // We must shift X first.
13013 X = DAG.getNode(ISD::SRL, dl, VT, X,
13014 DAG.getConstant(BitInX, dl, VT));
13015 }
13016
13017 for (unsigned BitInY = 0, NumActiveBits = OrCI.getActiveBits();
13018 BitInY < NumActiveBits; ++BitInY) {
13019 if (OrCI[BitInY] == 0)
13020 continue;
13021 APInt Mask(VT.getSizeInBits(), 0);
13022 Mask.setBit(BitInY);
13023 V = DAG.getNode(ARMISD::BFI, dl, VT, V, X,
13024 // Confusingly, the operand is an *inverted* mask.
13025 DAG.getConstant(~Mask, dl, VT));
13026 }
13027
13028 return V;
13029}
13030
13033 const ARMSubtarget *ST) {
13034 // Look for (brcond (xor test.set.loop.iterations, -1)
13035 SDValue CC = N->getOperand(1);
13036 unsigned Opc = CC->getOpcode();
13037 SDValue Int;
13038
13039 if ((Opc == ISD::XOR || Opc == ISD::SETCC) &&
13041
13042 assert((isa<ConstantSDNode>(CC->getOperand(1)) &&
13043 cast<ConstantSDNode>(CC->getOperand(1))->isOne()) &&
13044 "Expected to compare against 1");
13045
13046 Int = CC->getOperand(0);
13047 } else if (CC->getOpcode() == ISD::INTRINSIC_W_CHAIN)
13048 Int = CC;
13049 else
13050 return SDValue();
13051
13052 unsigned IntOp = cast<ConstantSDNode>(Int.getOperand(1))->getZExtValue();
13053 if (IntOp != Intrinsic::test_set_loop_iterations)
13054 return SDValue();
13055
13056 SDLoc dl(Int);
13057 SDValue Chain = N->getOperand(0);
13058 SDValue Elements = Int.getOperand(2);
13059 SDValue ExitBlock = N->getOperand(2);
13060
13061 // TODO: Once we start supporting tail predication, we can add another
13062 // operand to WLS for the number of elements processed in a vector loop.
13063
13064 SDValue Ops[] = { Chain, Elements, ExitBlock };
13065 SDValue Res = DCI.DAG.getNode(ARMISD::WLS, dl, MVT::Other, Ops);
13066 DCI.DAG.ReplaceAllUsesOfValueWith(Int.getValue(1), Int.getOperand(0));
13067 return Res;
13068}
13069
13070/// PerformBRCONDCombine - Target-specific DAG combining for ARMISD::BRCOND.
13071SDValue
13073 SDValue Cmp = N->getOperand(4);
13074 if (Cmp.getOpcode() != ARMISD::CMPZ)
13075 // Only looking at NE cases.
13076 return SDValue();
13077
13078 EVT VT = N->getValueType(0);
13079 SDLoc dl(N);
13080 SDValue LHS = Cmp.getOperand(0);
13081 SDValue RHS = Cmp.getOperand(1);
13082 SDValue Chain = N->getOperand(0);
13083 SDValue BB = N->getOperand(1);
13084 SDValue ARMcc = N->getOperand(2);
13085 ARMCC::CondCodes CC =
13086 (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue();
13087
13088 // (brcond Chain BB ne CPSR (cmpz (and (cmov 0 1 CC CPSR Cmp) 1) 0))
13089 // -> (brcond Chain BB CC CPSR Cmp)
13090 if (CC == ARMCC::NE && LHS.getOpcode() == ISD::AND && LHS->hasOneUse() &&
13091 LHS->getOperand(0)->getOpcode() == ARMISD::CMOV &&
13092 LHS->getOperand(0)->hasOneUse()) {
13093 auto *LHS00C = dyn_cast<ConstantSDNode>(LHS->getOperand(0)->getOperand(0));
13094 auto *LHS01C = dyn_cast<ConstantSDNode>(LHS->getOperand(0)->getOperand(1));
13095 auto *LHS1C = dyn_cast<ConstantSDNode>(LHS->getOperand(1));
13096 auto *RHSC = dyn_cast<ConstantSDNode>(RHS);
13097 if ((LHS00C && LHS00C->getZExtValue() == 0) &&
13098 (LHS01C && LHS01C->getZExtValue() == 1) &&
13099 (LHS1C && LHS1C->getZExtValue() == 1) &&
13100 (RHSC && RHSC->getZExtValue() == 0)) {
13101 return DAG.getNode(
13102 ARMISD::BRCOND, dl, VT, Chain, BB, LHS->getOperand(0)->getOperand(2),
13103 LHS->getOperand(0)->getOperand(3), LHS->getOperand(0)->getOperand(4));
13104 }
13105 }
13106
13107 return SDValue();
13108}
13109
13110/// PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV.
13111SDValue
13113 SDValue Cmp = N->getOperand(4);
13114 if (Cmp.getOpcode() != ARMISD::CMPZ)
13115 // Only looking at EQ and NE cases.
13116 return SDValue();
13117
13118 EVT VT = N->getValueType(0);
13119 SDLoc dl(N);
13120 SDValue LHS = Cmp.getOperand(0);
13121 SDValue RHS = Cmp.getOperand(1);
13122 SDValue FalseVal = N->getOperand(0);
13123 SDValue TrueVal = N->getOperand(1);
13124 SDValue ARMcc = N->getOperand(2);
13125 ARMCC::CondCodes CC =
13126 (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue();
13127
13128 // BFI is only available on V6T2+.
13129 if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops()) {
13131 if (R)
13132 return R;
13133 }
13134
13135 // Simplify
13136 // mov r1, r0
13137 // cmp r1, x
13138 // mov r0, y
13139 // moveq r0, x
13140 // to
13141 // cmp r0, x
13142 // movne r0, y
13143 //
13144 // mov r1, r0
13145 // cmp r1, x
13146 // mov r0, x
13147 // movne r0, y
13148 // to
13149 // cmp r0, x
13150 // movne r0, y
13151 /// FIXME: Turn this into a target neutral optimization?
13152 SDValue Res;
13153 if (CC == ARMCC::NE && FalseVal == RHS && FalseVal != LHS) {
13154 Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, TrueVal, ARMcc,
13155 N->getOperand(3), Cmp);
13156 } else if (CC == ARMCC::EQ && TrueVal == RHS) {
13157 SDValue ARMcc;
13158 SDValue NewCmp = getARMCmp(LHS, RHS, ISD::SETNE, ARMcc, DAG, dl);
13159 Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, FalseVal, ARMcc,
13160 N->getOperand(3), NewCmp);
13161 }
13162
13163 // (cmov F T ne CPSR (cmpz (cmov 0 1 CC CPSR Cmp) 0))
13164 // -> (cmov F T CC CPSR Cmp)
13165 if (CC == ARMCC::NE && LHS.getOpcode() == ARMISD::CMOV && LHS->hasOneUse()) {
13166 auto *LHS0C = dyn_cast<ConstantSDNode>(LHS->getOperand(0));
13167 auto *LHS1C = dyn_cast<ConstantSDNode>(LHS->getOperand(1));
13168 auto *RHSC = dyn_cast<ConstantSDNode>(RHS);
13169 if ((LHS0C && LHS0C->getZExtValue() == 0) &&
13170 (LHS1C && LHS1C->getZExtValue() == 1) &&
13171 (RHSC && RHSC->getZExtValue() == 0)) {
13172 return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal,
13173 LHS->getOperand(2), LHS->getOperand(3),
13174 LHS->getOperand(4));
13175 }
13176 }
13177
13178 if (!VT.isInteger())
13179 return SDValue();
13180
13181 // Materialize a boolean comparison for integers so we can avoid branching.
13182 if (isNullConstant(FalseVal)) {
13183 if (CC == ARMCC::EQ && isOneConstant(TrueVal)) {
13184 if (!Subtarget->isThumb1Only() && Subtarget->hasV5TOps()) {
13185 // If x == y then x - y == 0 and ARM's CLZ will return 32, shifting it
13186 // right 5 bits will make that 32 be 1, otherwise it will be 0.
13187 // CMOV 0, 1, ==, (CMPZ x, y) -> SRL (CTLZ (SUB x, y)), 5
13188 SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
13189 Res = DAG.getNode(ISD::SRL, dl, VT, DAG.getNode(ISD::CTLZ, dl, VT, Sub),
13190 DAG.getConstant(5, dl, MVT::i32));
13191 } else {
13192 // CMOV 0, 1, ==, (CMPZ x, y) ->
13193 // (ADDCARRY (SUB x, y), t:0, t:1)
13194 // where t = (SUBCARRY 0, (SUB x, y), 0)
13195 //
13196 // The SUBCARRY computes 0 - (x - y) and this will give a borrow when
13197 // x != y. In other words, a carry C == 1 when x == y, C == 0
13198 // otherwise.
13199 // The final ADDCARRY computes
13200 // x - y + (0 - (x - y)) + C == C
13201 SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
13202 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
13203 SDValue Neg = DAG.getNode(ISD::USUBO, dl, VTs, FalseVal, Sub);
13204 // ISD::SUBCARRY returns a borrow but we want the carry here
13205 // actually.
13206 SDValue Carry =
13207 DAG.getNode(ISD::SUB, dl, MVT::i32,
13208 DAG.getConstant(1, dl, MVT::i32), Neg.getValue(1));
13209 Res = DAG.getNode(ISD::ADDCARRY, dl, VTs, Sub, Neg, Carry);
13210 }
13211 } else if (CC == ARMCC::NE && !isNullConstant(RHS) &&
13212 (!Subtarget->isThumb1Only() || isPowerOf2Constant(TrueVal))) {
13213 // This seems pointless but will allow us to combine it further below.
13214 // CMOV 0, z, !=, (CMPZ x, y) -> CMOV (SUBS x, y), z, !=, (SUBS x, y):1
13215 SDValue Sub =
13216 DAG.getNode(ARMISD::SUBS, dl, DAG.getVTList(VT, MVT::i32), LHS, RHS);
13217 SDValue CPSRGlue = DAG.getCopyToReg(DAG.getEntryNode(), dl, ARM::CPSR,
13218 Sub.getValue(1), SDValue());
13219 Res = DAG.getNode(ARMISD::CMOV, dl, VT, Sub, TrueVal, ARMcc,
13220 N->getOperand(3), CPSRGlue.getValue(1));
13221 FalseVal = Sub;
13222 }
13223 } else if (isNullConstant(TrueVal)) {
13224 if (CC == ARMCC::EQ && !isNullConstant(RHS) &&
13225 (!Subtarget->isThumb1Only() || isPowerOf2Constant(FalseVal))) {
13226 // This seems pointless but will allow us to combine it further below
13227 // Note that we change == for != as this is the dual for the case above.
13228 // CMOV z, 0, ==, (CMPZ x, y) -> CMOV (SUBS x, y), z, !=, (SUBS x, y):1
13229 SDValue Sub =
13230 DAG.getNode(ARMISD::SUBS, dl, DAG.getVTList(VT, MVT::i32), LHS, RHS);
13231 SDValue CPSRGlue = DAG.getCopyToReg(DAG.getEntryNode(), dl, ARM::CPSR,
13232 Sub.getValue(1), SDValue());
13233 Res = DAG.getNode(ARMISD::CMOV, dl, VT, Sub, FalseVal,
13234 DAG.getConstant(ARMCC::NE, dl, MVT::i32),
13235 N->getOperand(3), CPSRGlue.getValue(1));
13236 FalseVal = Sub;
13237 }
13238 }
13239
13240 // On Thumb1, the DAG above may be further combined if z is a power of 2
13241 // (z == 2 ^ K).
13242 // CMOV (SUBS x, y), z, !=, (SUBS x, y):1 ->
13243 // t1 = (USUBO (SUB x, y), 1)
13244 // t2 = (SUBCARRY (SUB x, y), t1:0, t1:1)
13245 // Result = if K != 0 then (SHL t2:0, K) else t2:0
13246 //
13247 // This also handles the special case of comparing against zero; it's
13248 // essentially, the same pattern, except there's no SUBS:
13249 // CMOV x, z, !=, (CMPZ x, 0) ->
13250 // t1 = (USUBO x, 1)
13251 // t2 = (SUBCARRY x, t1:0, t1:1)
13252 // Result = if K != 0 then (SHL t2:0, K) else t2:0
13253 const APInt *TrueConst;
13254 if (Subtarget->isThumb1Only() && CC == ARMCC::NE &&
13255 ((FalseVal.getOpcode() == ARMISD::SUBS &&
13256 FalseVal.getOperand(0) == LHS && FalseVal.getOperand(1) == RHS) ||
13257 (FalseVal == LHS && isNullConstant(RHS))) &&
13258 (TrueConst = isPowerOf2Constant(TrueVal))) {
13259 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
13260 unsigned ShiftAmount = TrueConst->logBase2();
13261 if (ShiftAmount)
13262 TrueVal = DAG.getConstant(1, dl, VT);
13263 SDValue Subc = DAG.getNode(ISD::USUBO, dl, VTs, FalseVal, TrueVal);
13264 Res = DAG.getNode(ISD::SUBCARRY, dl, VTs, FalseVal, Subc, Subc.getValue(1));
13265
13266 if (ShiftAmount)
13267 Res = DAG.getNode(ISD::SHL, dl, VT, Res,
13268 DAG.getConstant(ShiftAmount, dl, MVT::i32));
13269 }
13270
13271 if (Res.getNode()) {
13272 KnownBits Known = DAG.computeKnownBits(SDValue(N,0));
13273 // Capture demanded bits information that would be otherwise lost.
13274 if (Known.Zero == 0xfffffffe)
13275 Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
13276 DAG.getValueType(MVT::i1));
13277 else if (Known.Zero == 0xffffff00)
13278 Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
13279 DAG.getValueType(MVT::i8));
13280 else if (Known.Zero == 0xffff0000)
13281 Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
13282 DAG.getValueType(MVT::i16));
13283 }
13284
13285 return Res;
13286}
13287
13289 DAGCombinerInfo &DCI) const {
13290 switch (N->getOpcode()) {
13291 default: break;
13292 case ISD::ABS: return PerformABSCombine(N, DCI, Subtarget);
13293 case ARMISD::ADDE: return PerformADDECombine(N, DCI, Subtarget);
13294 case ARMISD::UMLAL: return PerformUMLALCombine(N, DCI.DAG, Subtarget);
13295 case ISD::ADD: return PerformADDCombine(N, DCI, Subtarget);
13296 case ISD::SUB: return PerformSUBCombine(N, DCI);
13297 case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget);
13298 case ISD::OR: return PerformORCombine(N, DCI, Subtarget);
13299 case ISD::XOR: return PerformXORCombine(N, DCI, Subtarget);
13300 case ISD::AND: return PerformANDCombine(N, DCI, Subtarget);
13301 case ISD::BRCOND: return PerformHWLoopCombine(N, DCI, Subtarget);
13302 case ARMISD::ADDC:
13303 case ARMISD::SUBC: return PerformAddcSubcCombine(N, DCI, Subtarget);
13304 case ARMISD::SUBE: return PerformAddeSubeCombine(N, DCI, Subtarget);
13305 case ARMISD::BFI: return PerformBFICombine(N, DCI);
13306 case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI, Subtarget);
13307 case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG);
13308 case ISD::STORE: return PerformSTORECombine(N, DCI);
13309 case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI, Subtarget);
13312 case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI);
13313 case ARMISD::VDUP: return PerformVDUPCombine(N, DCI, Subtarget);
13314 case ISD::FP_TO_SINT:
13315 case ISD::FP_TO_UINT:
13316 return PerformVCVTCombine(N, DCI.DAG, Subtarget);
13317 case ISD::FDIV:
13318 return PerformVDIVCombine(N, DCI.DAG, Subtarget);
13320 case ISD::SHL:
13321 case ISD::SRA:
13322 case ISD::SRL:
13323 return PerformShiftCombine(N, DCI, Subtarget);
13324 case ISD::SIGN_EXTEND:
13325 case ISD::ZERO_EXTEND:
13326 case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget);
13327 case ARMISD::CMOV: return PerformCMOVCombine(N, DCI.DAG);
13328 case ARMISD::BRCOND: return PerformBRCONDCombine(N, DCI.DAG);
13329 case ISD::LOAD: return PerformLOADCombine(N, DCI);
13330 case ARMISD::VLD1DUP:
13331 case ARMISD::VLD2DUP:
13332 case ARMISD::VLD3DUP:
13333 case ARMISD::VLD4DUP:
13334 return PerformVLDCombine(N, DCI);
13336 return PerformARMBUILD_VECTORCombine(N, DCI);
13337 case ARMISD::SMULWB: {
13338 unsigned BitWidth = N->getValueType(0).getSizeInBits();
13339 APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 16);
13340 if (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI))
13341 return SDValue();
13342 break;
13343 }
13344 case ARMISD::SMULWT: {
13345 unsigned BitWidth = N->getValueType(0).getSizeInBits();
13346 APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 16);
13347 if (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI))
13348 return SDValue();
13349 break;
13350 }
13351 case ARMISD::SMLALBB: {
13352 unsigned BitWidth = N->getValueType(0).getSizeInBits();
13353 APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 16);
13354 if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) ||
13355 (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI)))
13356 return SDValue();
13357 break;
13358 }
13359 case ARMISD::SMLALBT: {
13360 unsigned LowWidth = N->getOperand(0).getValueType().getSizeInBits();
13361 APInt LowMask = APInt::getLowBitsSet(LowWidth, 16);
13362 unsigned HighWidth = N->getOperand(1).getValueType().getSizeInBits();
13363 APInt HighMask = APInt::getHighBitsSet(HighWidth, 16);
13364 if ((SimplifyDemandedBits(N->getOperand(0), LowMask, DCI)) ||
13365 (SimplifyDemandedBits(N->getOperand(1), HighMask, DCI)))
13366 return SDValue();
13367 break;
13368 }
13369 case ARMISD::SMLALTB: {
13370 unsigned HighWidth = N->getOperand(0).getValueType().getSizeInBits();
13371 APInt HighMask = APInt::getHighBitsSet(HighWidth, 16);
13372 unsigned LowWidth = N->getOperand(1).getValueType().getSizeInBits();
13373 APInt LowMask = APInt::getLowBitsSet(LowWidth, 16);
13374 if ((SimplifyDemandedBits(N->getOperand(0), HighMask, DCI)) ||
13375 (SimplifyDemandedBits(N->getOperand(1), LowMask, DCI)))
13376 return SDValue();
13377 break;
13378 }
13379 case ARMISD::SMLALTT: {
13380 unsigned BitWidth = N->getValueType(0).getSizeInBits();
13381 APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 16);
13382 if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) ||
13383 (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI)))
13384 return SDValue();
13385 break;
13386 }
13389 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
13390 case Intrinsic::arm_neon_vld1:
13391 case Intrinsic::arm_neon_vld1x2:
13392 case Intrinsic::arm_neon_vld1x3:
13393 case Intrinsic::arm_neon_vld1x4:
13394 case Intrinsic::arm_neon_vld2:
13395 case Intrinsic::arm_neon_vld3:
13396 case Intrinsic::arm_neon_vld4:
13397 case Intrinsic::arm_neon_vld2lane:
13398 case Intrinsic::arm_neon_vld3lane:
13399 case Intrinsic::arm_neon_vld4lane:
13400 case Intrinsic::arm_neon_vld2dup:
13401 case Intrinsic::arm_neon_vld3dup:
13402 case Intrinsic::arm_neon_vld4dup:
13403 case Intrinsic::arm_neon_vst1:
13404 case Intrinsic::arm_neon_vst1x2:
13405 case Intrinsic::arm_neon_vst1x3:
13406 case Intrinsic::arm_neon_vst1x4:
13407 case Intrinsic::arm_neon_vst2:
13408 case Intrinsic::arm_neon_vst3:
13409 case Intrinsic::arm_neon_vst4:
13410 case Intrinsic::arm_neon_vst2lane:
13411 case Intrinsic::arm_neon_vst3lane:
13412 case Intrinsic::arm_neon_vst4lane:
13413 return PerformVLDCombine(N, DCI);
13414 default: break;
13415 }
13416 break;
13417 }
13418 return SDValue();
13419}
13420
13422 EVT VT) const {
13423 return (VT == MVT::f32) && (Opc == ISD::LOAD || Opc == ISD::STORE);
13424}
13425
13427 unsigned Alignment,
13429 bool *Fast) const {
13430 // Depends what it gets converted into if the type is weird.
13431 if (!VT.isSimple())
13432 return false;
13433
13434 // The AllowsUnaliged flag models the SCTLR.A setting in ARM cpus
13435 bool AllowsUnaligned = Subtarget->allowsUnalignedMem();
13436 auto Ty = VT.getSimpleVT().SimpleTy;
13437
13438 if (Ty == MVT::i8 || Ty == MVT::i16 || Ty == MVT::i32) {
13439 // Unaligned access can use (for example) LRDB, LRDH, LDR
13440 if (AllowsUnaligned) {
13441 if (Fast)
13442 *Fast = Subtarget->hasV7Ops();
13443 return true;
13444 }
13445 }
13446
13447 if (Ty == MVT::f64 || Ty == MVT::v2f64) {
13448 // For any little-endian targets with neon, we can support unaligned ld/st
13449 // of D and Q (e.g. {D0,D1}) registers by using vld1.i8/vst1.i8.
13450 // A big-endian target may also explicitly support unaligned accesses
13451 if (Subtarget->hasNEON() && (AllowsUnaligned || Subtarget->isLittle())) {
13452 if (Fast)
13453 *Fast = true;
13454 return true;
13455 }
13456 }
13457
13458 if (!Subtarget->hasMVEIntegerOps())
13459 return false;
13460 if (Ty != MVT::v16i8 && Ty != MVT::v8i16 && Ty != MVT::v8f16 &&
13461 Ty != MVT::v4i32 && Ty != MVT::v4f32 && Ty != MVT::v2i64 &&
13462 Ty != MVT::v2f64 &&
13463 // These are for truncated stores
13464 Ty != MVT::v4i8 && Ty != MVT::v8i8 && Ty != MVT::v4i16)
13465 return false;
13466
13467 if (Subtarget->isLittle()) {
13468 // In little-endian MVE, the store instructions VSTRB.U8,
13469 // VSTRH.U16 and VSTRW.U32 all store the vector register in
13470 // exactly the same format, and differ only in the range of
13471 // their immediate offset field and the required alignment.
13472 //
13473 // In particular, VSTRB.U8 can store a vector at byte alignment.
13474 // So at this stage we can simply say that loads/stores of all
13475 // 128-bit wide vector types are permitted at any alignment,
13476 // because we know at least _one_ instruction can manage that.
13477 //
13478 // Later on we might find that some of those loads are better
13479 // generated as VLDRW.U32 if alignment permits, to take
13480 // advantage of the larger immediate range. But for the moment,
13481 // all that matters is that if we don't lower the load then
13482 // _some_ instruction can handle it.
13483 if (Fast)
13484 *Fast = true;
13485 return true;
13486 } else {
13487 // In big-endian MVE, those instructions aren't so similar
13488 // after all, because they reorder the bytes of the vector
13489 // differently. So this time we can only store a particular
13490 // kind of vector if its alignment is at least the element
13491 // type. And we can't store vectors of i64 or f64 at all
13492 // without having to do some postprocessing, because there's
13493 // no VSTRD.U64.
13494 if (Ty == MVT::v16i8 ||
13495 ((Ty == MVT::v8i16 || Ty == MVT::v8f16) && Alignment >= 2) ||
13496 ((Ty == MVT::v4i32 || Ty == MVT::v4f32) && Alignment >= 4)) {
13497 if (Fast)
13498 *Fast = true;
13499 return true;
13500 }
13501 }
13502
13503 return false;
13504}
13505
13506static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign,
13507 unsigned AlignCheck) {
13508 return ((SrcAlign == 0 || SrcAlign % AlignCheck == 0) &&
13509 (DstAlign == 0 || DstAlign % AlignCheck == 0));
13510}
13511
13513 uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset,
13514 bool ZeroMemset, bool MemcpyStrSrc,
13515 const AttributeList &FuncAttributes) const {
13516 // See if we can use NEON instructions for this...
13517 if ((!IsMemset || ZeroMemset) && Subtarget->hasNEON() &&
13518 !FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat)) {
13519 bool Fast;
13520 if (Size >= 16 &&
13521 (memOpAlign(SrcAlign, DstAlign, 16) ||
13523 MachineMemOperand::MONone, &Fast) &&
13524 Fast))) {
13525 return MVT::v2f64;
13526 } else if (Size >= 8 &&
13527 (memOpAlign(SrcAlign, DstAlign, 8) ||
13529 MVT::f64, 0, 1, MachineMemOperand::MONone, &Fast) &&
13530 Fast))) {
13531 return MVT::f64;
13532 }
13533 }
13534
13535 // Let the target-independent logic figure it out.
13536 return MVT::Other;
13537}
13538
13539// 64-bit integers are split into their high and low parts and held in two
13540// different registers, so the trunc is free since the low register can just
13541// be used.
13542bool ARMTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const {
13543 if (!SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
13544 return false;
13545 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
13546 unsigned DestBits = DstTy->getPrimitiveSizeInBits();
13547 return (SrcBits == 64 && DestBits == 32);
13548}
13549
13551 if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() ||
13552 !DstVT.isInteger())
13553 return false;
13554 unsigned SrcBits = SrcVT.getSizeInBits();
13555 unsigned DestBits = DstVT.getSizeInBits();
13556 return (SrcBits == 64 && DestBits == 32);
13557}
13558
13560 if (Val.getOpcode() != ISD::LOAD)
13561 return false;
13562
13563 EVT VT1 = Val.getValueType();
13564 if (!VT1.isSimple() || !VT1.isInteger() ||
13565 !VT2.isSimple() || !VT2.isInteger())
13566 return false;
13567
13568 switch (VT1.getSimpleVT().SimpleTy) {
13569 default: break;
13570 case MVT::i1:
13571 case MVT::i8:
13572 case MVT::i16:
13573 // 8-bit and 16-bit loads implicitly zero-extend to 32-bits.
13574 return true;
13575 }
13576
13577 return false;
13578}
13579
13581 if (!VT.isSimple())
13582 return false;
13583
13584 // There are quite a few FP16 instructions (e.g. VNMLA, VNMLS, etc.) that
13585 // negate values directly (fneg is free). So, we don't want to let the DAG
13586 // combiner rewrite fneg into xors and some other instructions. For f16 and
13587 // FullFP16 argument passing, some bitcast nodes may be introduced,
13588 // triggering this DAG combine rewrite, so we are avoiding that with this.
13589 switch (VT.getSimpleVT().SimpleTy) {
13590 default: break;
13591 case MVT::f16:
13592 return Subtarget->hasFullFP16();
13593 }
13594
13595 return false;
13596}
13597
13598/// Check if Ext1 and Ext2 are extends of the same type, doubling the bitwidth
13599/// of the vector elements.
13600static bool areExtractExts(Value *Ext1, Value *Ext2) {
13601 auto areExtDoubled = [](Instruction *Ext) {
13602 return Ext->getType()->getScalarSizeInBits() ==
13603 2 * Ext->getOperand(0)->getType()->getScalarSizeInBits();
13604 };
13605
13606 if (!match(Ext1, m_ZExtOrSExt(m_Value())) ||
13607 !match(Ext2, m_ZExtOrSExt(m_Value())) ||
13608 !areExtDoubled(cast<Instruction>(Ext1)) ||
13609 !areExtDoubled(cast<Instruction>(Ext2)))
13610 return false;
13611
13612 return true;
13613}
13614
13615/// Check if sinking \p I's operands to I's basic block is profitable, because
13616/// the operands can be folded into a target instruction, e.g.
13617/// sext/zext can be folded into vsubl.
13619 SmallVectorImpl<Use *> &Ops) const {
13620 if (!Subtarget->hasNEON() || !I->getType()->isVectorTy())
13621 return false;
13622
13623 switch (I->getOpcode()) {
13624 case Instruction::Sub:
13625 case Instruction::Add: {
13626 if (!areExtractExts(I->getOperand(0), I->getOperand(1)))
13627 return false;
13628 Ops.push_back(&I->getOperandUse(0));
13629 Ops.push_back(&I->getOperandUse(1));
13630 return true;
13631 }
13632 default:
13633 return false;
13634 }
13635 return false;
13636}
13637
13639 EVT VT = ExtVal.getValueType();
13640
13641 if (!isTypeLegal(VT))
13642 return false;
13643
13644 // Don't create a loadext if we can fold the extension into a wide/long
13645 // instruction.
13646 // If there's more than one user instruction, the loadext is desirable no
13647 // matter what. There can be two uses by the same instruction.
13648 if (ExtVal->use_empty() ||
13649 !ExtVal->use_begin()->isOnlyUserOf(ExtVal.getNode()))
13650 return true;
13651
13652 SDNode *U = *ExtVal->use_begin();
13653 if ((U->getOpcode() == ISD::ADD || U->getOpcode() == ISD::SUB ||
13654 U->getOpcode() == ISD::SHL || U->getOpcode() == ARMISD::VSHLIMM))
13655 return false;
13656
13657 return true;
13658}
13659
13661 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
13662 return false;
13663
13664 if (!isTypeLegal(EVT::getEVT(Ty1)))
13665 return false;
13666
13667 assert(Ty1->getPrimitiveSizeInBits() <= 64 && "i128 is probably not a noop");
13668
13669 // Assuming the caller doesn't have a zeroext or signext return parameter,
13670 // truncation all the way down to i1 is valid.
13671 return true;
13672}
13673
13675 const AddrMode &AM, Type *Ty,
13676 unsigned AS) const {
13677 if (isLegalAddressingMode(DL, AM, Ty, AS)) {
13678 if (Subtarget->hasFPAO())
13679 return AM.Scale < 0 ? 1 : 0; // positive offsets execute faster
13680 return 0;
13681 }
13682 return -1;
13683}
13684
13685static bool isLegalT1AddressImmediate(int64_t V, EVT VT) {
13686 if (V < 0)
13687 return false;
13688
13689 unsigned Scale = 1;
13690 switch (VT.getSimpleVT().SimpleTy) {
13691 case MVT::i1:
13692 case MVT::i8:
13693 // Scale == 1;
13694 break;
13695 case MVT::i16:
13696 // Scale == 2;
13697 Scale = 2;
13698 break;
13699 default:
13700 // On thumb1 we load most things (i32, i64, floats, etc) with a LDR
13701 // Scale == 4;
13702 Scale = 4;
13703 break;
13704 }
13705
13706 if ((V & (Scale - 1)) != 0)
13707 return false;
13708 return isUInt<5>(V / Scale);
13709}
13710
13711static bool isLegalT2AddressImmediate(int64_t V, EVT VT,
13712 const ARMSubtarget *Subtarget) {
13713 if (!VT.isInteger() && !VT.isFloatingPoint())
13714 return false;
13715 if (VT.isVector() && Subtarget->hasNEON())
13716 return false;
13717 if (VT.isVector() && VT.isFloatingPoint() && Subtarget->hasMVEIntegerOps() &&
13718 !Subtarget->hasMVEFloatOps())
13719 return false;
13720
13721 bool IsNeg = false;
13722 if (V < 0) {
13723 IsNeg = true;
13724 V = -V;
13725 }
13726
13727 unsigned NumBytes = std::max(VT.getSizeInBits() / 8, 1U);
13728
13729 // MVE: size * imm7
13730 if (VT.isVector() && Subtarget->hasMVEIntegerOps()) {
13731 switch (VT.getSimpleVT().getVectorElementType().SimpleTy) {
13732 case MVT::i32:
13733 case MVT::f32:
13734 return isShiftedUInt<7,2>(V);
13735 case MVT::i16:
13736 case MVT::f16:
13737 return isShiftedUInt<7,1>(V);
13738 case MVT::i8:
13739 return isUInt<7>(V);
13740 default:
13741 return false;
13742 }
13743 }
13744
13745 // half VLDR: 2 * imm8
13746 if (VT.isFloatingPoint() && NumBytes == 2 && Subtarget->hasFPRegs16())
13747 return isShiftedUInt<8, 1>(V);
13748 // VLDR and LDRD: 4 * imm8
13749 if ((VT.isFloatingPoint() && Subtarget->hasVFP2Base()) || NumBytes == 8)
13750 return isShiftedUInt<8, 2>(V);
13751
13752 if (NumBytes == 1 || NumBytes == 2 || NumBytes == 4) {
13753 // + imm12 or - imm8
13754 if (IsNeg)
13755 return isUInt<8>(V);
13756 return isUInt<12>(V);
13757 }
13758
13759 return false;
13760}
13761
13762/// isLegalAddressImmediate - Return true if the integer value can be used
13763/// as the offset of the target addressing mode for load / store of the
13764/// given type.
13765static bool isLegalAddressImmediate(int64_t V, EVT VT,
13766 const ARMSubtarget *Subtarget) {
13767 if (V == 0)
13768 return true;
13769
13770 if (!VT.isSimple())
13771 return false;
13772
13773 if (Subtarget->isThumb1Only())
13774 return isLegalT1AddressImmediate(V, VT);
13775 else if (Subtarget->isThumb2())
13776 return isLegalT2AddressImmediate(V, VT, Subtarget);
13777
13778 // ARM mode.
13779 if (V < 0)
13780 V = - V;
13781 switch (VT.getSimpleVT().SimpleTy) {
13782 default: return false;
13783 case MVT::i1:
13784 case MVT::i8:
13785 case MVT::i32:
13786 // +- imm12
13787 return isUInt<12>(V);
13788 case MVT::i16:
13789 // +- imm8
13790 return isUInt<8>(V);
13791 case MVT::f32:
13792 case MVT::f64:
13793 if (!Subtarget->hasVFP2Base()) // FIXME: NEON?
13794 return false;
13795 return isShiftedUInt<8, 2>(V);
13796 }
13797}
13798
13800 EVT VT) const {
13801 int Scale = AM.Scale;
13802 if (Scale < 0)
13803 return false;
13804
13805 switch (VT.getSimpleVT().SimpleTy) {
13806 default: return false;
13807 case MVT::i1:
13808 case MVT::i8:
13809 case MVT::i16:
13810 case MVT::i32:
13811 if (Scale == 1)
13812 return true;
13813 // r + r << imm
13814 Scale = Scale & ~1;
13815 return Scale == 2 || Scale == 4 || Scale == 8;
13816 case MVT::i64:
13817 // FIXME: What are we trying to model here? ldrd doesn't have an r + r
13818 // version in Thumb mode.
13819 // r + r
13820 if (Scale == 1)
13821 return true;
13822 // r * 2 (this can be lowered to r + r).
13823 if (!AM.HasBaseReg && Scale == 2)
13824 return true;
13825 return false;
13826 case MVT::isVoid:
13827 // Note, we allow "void" uses (basically, uses that aren't loads or
13828 // stores), because arm allows folding a scale into many arithmetic
13829 // operations. This should be made more precise and revisited later.
13830
13831 // Allow r << imm, but the imm has to be a multiple of two.
13832 if (Scale & 1) return false;
13833 return isPowerOf2_32(Scale);
13834 }
13835}
13836
13838 EVT VT) const {
13839 const int Scale = AM.Scale;
13840
13841 // Negative scales are not supported in Thumb1.
13842 if (Scale < 0)
13843 return false;
13844
13845 // Thumb1 addressing modes do not support register scaling excepting the
13846 // following cases:
13847 // 1. Scale == 1 means no scaling.
13848 // 2. Scale == 2 this can be lowered to r + r if there is no base register.
13849 return (Scale == 1) || (!AM.HasBaseReg && Scale == 2);
13850}
13851
13852/// isLegalAddressingMode - Return true if the addressing mode represented
13853/// by AM is legal for this target, for a load/store of the specified type.
13855 const AddrMode &AM, Type *Ty,
13856 unsigned AS, Instruction *I) const {
13857 EVT VT = getValueType(DL, Ty, true);
13858 if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget))
13859 return false;
13860
13861 // Can never fold addr of global into load/store.
13862 if (AM.BaseGV)
13863 return false;
13864
13865 switch (AM.Scale) {
13866 case 0: // no scale reg, must be "r+i" or "r", or "i".
13867 break;
13868 default:
13869 // ARM doesn't support any R+R*scale+imm addr modes.
13870 if (AM.BaseOffs)
13871 return false;
13872
13873 if (!VT.isSimple())
13874 return false;
13875
13876 if (Subtarget->isThumb1Only())
13877 return isLegalT1ScaledAddressingMode(AM, VT);
13878
13879 if (Subtarget->isThumb2())
13880 return isLegalT2ScaledAddressingMode(AM, VT);
13881
13882 int Scale = AM.Scale;
13883 switch (VT.getSimpleVT().SimpleTy) {
13884 default: return false;
13885 case MVT::i1:
13886 case MVT::i8:
13887 case MVT::i32:
13888 if (Scale < 0) Scale = -Scale;
13889 if (Scale == 1)
13890 return true;
13891 // r + r << imm
13892 return isPowerOf2_32(Scale & ~1);
13893 case MVT::i16:
13894 case MVT::i64:
13895 // r +/- r
13896 if (Scale == 1 || (AM.HasBaseReg && Scale == -1))
13897 return true;
13898 // r * 2 (this can be lowered to r + r).
13899 if (!AM.HasBaseReg && Scale == 2)
13900 return true;
13901 return false;
13902
13903 case MVT::isVoid:
13904 // Note, we allow "void" uses (basically, uses that aren't loads or
13905 // stores), because arm allows folding a scale into many arithmetic
13906 // operations. This should be made more precise and revisited later.
13907
13908 // Allow r << imm, but the imm has to be a multiple of two.
13909 if (Scale & 1) return false;
13910 return isPowerOf2_32(Scale);
13911 }
13912 }
13913 return true;
13914}
13915
13916/// isLegalICmpImmediate - Return true if the specified immediate is legal
13917/// icmp immediate, that is the target has icmp instructions which can compare
13918/// a register against the immediate without having to materialize the
13919/// immediate into a register.
13921 // Thumb2 and ARM modes can use cmn for negative immediates.
13922 if (!Subtarget->isThumb())
13923 return ARM_AM::getSOImmVal((uint32_t)Imm) != -1 ||
13924 ARM_AM::getSOImmVal(-(uint32_t)Imm) != -1;
13925 if (Subtarget->isThumb2())
13926 return ARM_AM::getT2SOImmVal((uint32_t)Imm) != -1 ||
13927 ARM_AM::getT2SOImmVal(-(uint32_t)Imm) != -1;
13928 // Thumb1 doesn't have cmn, and only 8-bit immediates.
13929 return Imm >= 0 && Imm <= 255;
13930}
13931
13932/// isLegalAddImmediate - Return true if the specified immediate is a legal add
13933/// *or sub* immediate, that is the target has add or sub instructions which can
13934/// add a register with the immediate without having to materialize the
13935/// immediate into a register.
13937 // Same encoding for add/sub, just flip the sign.
13938 int64_t AbsImm = std::abs(Imm);
13939 if (!Subtarget->isThumb())
13940 return ARM_AM::getSOImmVal(AbsImm) != -1;
13941 if (Subtarget->isThumb2())
13942 return ARM_AM::getT2SOImmVal(AbsImm) != -1;
13943 // Thumb1 only has 8-bit unsigned immediate.
13944 return AbsImm >= 0 && AbsImm <= 255;
13945}
13946
13948 bool isSEXTLoad, SDValue &Base,
13949 SDValue &Offset, bool &isInc,
13950 SelectionDAG &DAG) {
13951 if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
13952 return false;
13953
13954 if (VT == MVT::i16 || ((VT == MVT::i8 || VT == MVT::i1) && isSEXTLoad)) {
13955 // AddressingMode 3
13956 Base = Ptr->getOperand(0);
13957 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
13958 int RHSC = (int)RHS->getZExtValue();
13959 if (RHSC < 0 && RHSC > -256) {
13960 assert(Ptr->getOpcode() == ISD::ADD);
13961 isInc = false;
13962 Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
13963 return true;
13964 }
13965 }
13966 isInc = (Ptr->getOpcode() == ISD::ADD);
13967 Offset = Ptr->getOperand(1);
13968 return true;
13969 } else if (VT == MVT::i32 || VT == MVT::i8 || VT == MVT::i1) {
13970 // AddressingMode 2
13971 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
13972 int RHSC = (int)RHS->getZExtValue();
13973 if (RHSC < 0 && RHSC > -0x1000) {
13974 assert(Ptr->getOpcode() == ISD::ADD);
13975 isInc = false;
13976 Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
13977 Base = Ptr->getOperand(0);
13978 return true;
13979 }
13980 }
13981
13982 if (Ptr->getOpcode() == ISD::ADD) {
13983 isInc = true;
13984 ARM_AM::ShiftOpc ShOpcVal=
13986 if (ShOpcVal != ARM_AM::no_shift) {
13987 Base = Ptr->getOperand(1);
13988 Offset = Ptr->getOperand(0);
13989 } else {
13990 Base = Ptr->getOperand(0);
13991 Offset = Ptr->getOperand(1);
13992 }
13993 return true;
13994 }
13995
13996 isInc = (Ptr->getOpcode() == ISD::ADD);
13997 Base = Ptr->getOperand(0);
13998 Offset = Ptr->getOperand(1);
13999 return true;
14000 }
14001
14002 // FIXME: Use VLDM / VSTM to emulate indexed FP load / store.
14003 return false;
14004}
14005
14007 bool isSEXTLoad, SDValue &Base,
14008 SDValue &Offset, bool &isInc,
14009 SelectionDAG &DAG) {
14010 if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
14011 return false;
14012
14013 Base = Ptr->getOperand(0);
14014 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
14015 int RHSC = (int)RHS->getZExtValue();
14016 if (RHSC < 0 && RHSC > -0x100) { // 8 bits.
14017 assert(Ptr->getOpcode() == ISD::ADD);
14018 isInc = false;
14019 Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
14020 return true;
14021 } else if (RHSC > 0 && RHSC < 0x100) { // 8 bit, no zero.
14022 isInc = Ptr->getOpcode() == ISD::ADD;
14023 Offset = DAG.getConstant(RHSC, SDLoc(Ptr), RHS->getValueType(0));
14024 return true;
14025 }
14026 }
14027
14028 return false;
14029}
14030
14031/// getPreIndexedAddressParts - returns true by value, base pointer and
14032/// offset pointer and addressing mode by reference if the node's address
14033/// can be legally represented as pre-indexed load / store address.
14034bool
14036 SDValue &Offset,
14038 SelectionDAG &DAG) const {
14039 if (Subtarget->isThumb1Only())
14040 return false;
14041
14042 EVT VT;
14043 SDValue Ptr;
14044 bool isSEXTLoad = false;
14045 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
14046 Ptr = LD->getBasePtr();
14047 VT = LD->getMemoryVT();
14048 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
14049 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
14050 Ptr = ST->getBasePtr();
14051 VT = ST->getMemoryVT();
14052 } else
14053 return false;
14054
14055 bool isInc;
14056 bool isLegal = false;
14057 if (Subtarget->isThumb2())
14058 isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
14059 Offset, isInc, DAG);
14060 else
14061 isLegal = getARMIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
14062 Offset, isInc, DAG);
14063 if (!isLegal)
14064 return false;
14065
14066 AM = isInc ? ISD::PRE_INC : ISD::PRE_DEC;
14067 return true;
14068}
14069
14070/// getPostIndexedAddressParts - returns true by value, base pointer and
14071/// offset pointer and addressing mode by reference if this node can be
14072/// combined with a load / store to form a post-indexed load / store.
14074 SDValue &Base,
14075 SDValue &Offset,
14077 SelectionDAG &DAG) const {
14078 EVT VT;
14079 SDValue Ptr;
14080 bool isSEXTLoad = false, isNonExt;
14081 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
14082 VT = LD->getMemoryVT();
14083 Ptr = LD->getBasePtr();
14084 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
14085 isNonExt = LD->getExtensionType() == ISD::NON_EXTLOAD;
14086 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
14087 VT = ST->getMemoryVT();
14088 Ptr = ST->getBasePtr();
14089 isNonExt = !ST->isTruncatingStore();
14090 } else
14091 return false;
14092
14093 if (Subtarget->isThumb1Only()) {
14094 // Thumb-1 can do a limited post-inc load or store as an updating LDM. It
14095 // must be non-extending/truncating, i32, with an offset of 4.
14096 assert(Op->getValueType(0) == MVT::i32 && "Non-i32 post-inc op?!");
14097 if (Op->getOpcode() != ISD::ADD || !isNonExt)
14098 return false;
14099 auto *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1));
14100 if (!RHS || RHS->getZExtValue() != 4)
14101 return false;
14102
14103 Offset = Op->getOperand(1);
14104 Base = Op->getOperand(0);
14105 AM = ISD::POST_INC;
14106 return true;
14107 }
14108
14109 bool isInc;
14110 bool isLegal = false;
14111 if (Subtarget->isThumb2())
14112 isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
14113 isInc, DAG);
14114 else
14115 isLegal = getARMIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
14116 isInc, DAG);
14117 if (!isLegal)
14118 return false;
14119
14120 if (Ptr != Base) {
14121 // Swap base ptr and offset to catch more post-index load / store when
14122 // it's legal. In Thumb2 mode, offset must be an immediate.
14123 if (Ptr == Offset && Op->getOpcode() == ISD::ADD &&
14124 !Subtarget->isThumb2())
14125 std::swap(Base, Offset);
14126
14127 // Post-indexed load / store update the base pointer.
14128 if (Ptr != Base)
14129 return false;
14130 }
14131
14132 AM = isInc ? ISD::POST_INC : ISD::POST_DEC;
14133 return true;
14134}
14135
14137 KnownBits &Known,
14138 const APInt &DemandedElts,
14139 const SelectionDAG &DAG,
14140 unsigned Depth) const {
14141 unsigned BitWidth = Known.getBitWidth();
14142 Known.resetAll();
14143 switch (Op.getOpcode()) {
14144 default: break;
14145 case ARMISD::ADDC:
14146 case ARMISD::ADDE:
14147 case ARMISD::SUBC:
14148 case ARMISD::SUBE:
14149 // Special cases when we convert a carry to a boolean.
14150 if (Op.getResNo() == 0) {
14151 SDValue LHS = Op.getOperand(0);
14152 SDValue RHS = Op.getOperand(1);
14153 // (ADDE 0, 0, C) will give us a single bit.
14154 if (Op->getOpcode() == ARMISD::ADDE && isNullConstant(LHS) &&
14155 isNullConstant(RHS)) {
14156 Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1);
14157 return;
14158 }
14159 }
14160 break;
14161 case ARMISD::CMOV: {
14162 // Bits are known zero/one if known on the LHS and RHS.
14163 Known = DAG.computeKnownBits(Op.getOperand(0), Depth+1);
14164 if (Known.isUnknown())
14165 return;
14166
14167 KnownBits KnownRHS = DAG.computeKnownBits(Op.getOperand(1), Depth+1);
14168 Known.Zero &= KnownRHS.Zero;
14169 Known.One &= KnownRHS.One;
14170 return;
14171 }
14173 ConstantSDNode *CN = cast<ConstantSDNode>(Op->getOperand(1));
14174 Intrinsic::ID IntID = static_cast<Intrinsic::ID>(CN->getZExtValue());
14175 switch (IntID) {
14176 default: return;
14177 case Intrinsic::arm_ldaex:
14178 case Intrinsic::arm_ldrex: {
14179 EVT VT = cast<MemIntrinsicSDNode>(Op)->getMemoryVT();
14180 unsigned MemBits = VT.getScalarSizeInBits();
14181 Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
14182 return;
14183 }
14184 }
14185 }
14186 case ARMISD::BFI: {
14187 // Conservatively, we can recurse down the first operand
14188 // and just mask out all affected bits.
14189 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
14190
14191 // The operand to BFI is already a mask suitable for removing the bits it
14192 // sets.
14193 ConstantSDNode *CI = cast<ConstantSDNode>(Op.getOperand(2));
14194 const APInt &Mask = CI->getAPIntValue();
14195 Known.Zero &= Mask;
14196 Known.One &= Mask;
14197 return;
14198 }
14199 case ARMISD::VGETLANEs:
14200 case ARMISD::VGETLANEu: {
14201 const SDValue &SrcSV = Op.getOperand(0);
14202 EVT VecVT = SrcSV.getValueType();
14203 assert(VecVT.isVector() && "VGETLANE expected a vector type");
14204 const unsigned NumSrcElts = VecVT.getVectorNumElements();
14205 ConstantSDNode *Pos = cast<ConstantSDNode>(Op.getOperand(1).getNode());
14206 assert(Pos->getAPIntValue().ult(NumSrcElts) &&
14207 "VGETLANE index out of bounds");
14208 unsigned Idx = Pos->getZExtValue();
14209 APInt DemandedElt = APInt::getOneBitSet(NumSrcElts, Idx);
14210 Known = DAG.computeKnownBits(SrcSV, DemandedElt, Depth + 1);
14211
14212 EVT VT = Op.getValueType();
14213 const unsigned DstSz = VT.getScalarSizeInBits();
14214 const unsigned SrcSz = VecVT.getVectorElementType().getSizeInBits();
14215 (void)SrcSz;
14216 assert(SrcSz == Known.getBitWidth());
14217 assert(DstSz > SrcSz);
14218 if (Op.getOpcode() == ARMISD::VGETLANEs)
14219 Known = Known.sext(DstSz);
14220 else {
14221 Known = Known.zext(DstSz, true /* extended bits are known zero */);
14222 }
14223 assert(DstSz == Known.getBitWidth());
14224 break;
14225 }
14226 }
14227}
14228
14229bool
14231 const APInt &DemandedAPInt,
14232 TargetLoweringOpt &TLO) const {
14233 // Delay optimization, so we don't have to deal with illegal types, or block
14234 // optimizations.
14235 if (!TLO.LegalOps)
14236 return false;
14237
14238 // Only optimize AND for now.
14239 if (Op.getOpcode() != ISD::AND)
14240 return false;
14241
14242 EVT VT = Op.getValueType();
14243
14244 // Ignore vectors.
14245 if (VT.isVector())
14246 return false;
14247
14248 assert(VT == MVT::i32 && "Unexpected integer type");
14249
14250 // Make sure the RHS really is a constant.
14251 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
14252 if (!C)
14253 return false;
14254
14255 unsigned Mask = C->getZExtValue();
14256
14257 unsigned Demanded = DemandedAPInt.getZExtValue();
14258 unsigned ShrunkMask = Mask & Demanded;
14259 unsigned ExpandedMask = Mask | ~Demanded;
14260
14261 // If the mask is all zeros, let the target-independent code replace the
14262 // result with zero.
14263 if (ShrunkMask == 0)
14264 return false;
14265
14266 // If the mask is all ones, erase the AND. (Currently, the target-independent
14267 // code won't do this, so we have to do it explicitly to avoid an infinite
14268 // loop in obscure cases.)
14269 if (ExpandedMask == ~0U)
14270 return TLO.CombineTo(Op, Op.getOperand(0));
14271
14272 auto IsLegalMask = [ShrunkMask, ExpandedMask](unsigned Mask) -> bool {
14273 return (ShrunkMask & Mask) == ShrunkMask && (~ExpandedMask & Mask) == 0;
14274 };
14275 auto UseMask = [Mask, Op, VT, &TLO](unsigned NewMask) -> bool {
14276 if (NewMask == Mask)
14277 return true;
14278 SDLoc DL(Op);
14279 SDValue NewC = TLO.DAG.getConstant(NewMask, DL, VT);
14280 SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC);
14281 return TLO.CombineTo(Op, NewOp);
14282 };
14283
14284 // Prefer uxtb mask.
14285 if (IsLegalMask(0xFF))
14286 return UseMask(0xFF);
14287
14288 // Prefer uxth mask.
14289 if (IsLegalMask(0xFFFF))
14290 return UseMask(0xFFFF);
14291
14292 // [1, 255] is Thumb1 movs+ands, legal immediate for ARM/Thumb2.
14293 // FIXME: Prefer a contiguous sequence of bits for other optimizations.
14294 if (ShrunkMask < 256)
14295 return UseMask(ShrunkMask);
14296
14297 // [-256, -2] is Thumb1 movs+bics, legal immediate for ARM/Thumb2.
14298 // FIXME: Prefer a contiguous sequence of bits for other optimizations.
14299 if ((int)ExpandedMask <= -2 && (int)ExpandedMask >= -256)
14300 return UseMask(ExpandedMask);
14301
14302 // Potential improvements:
14303 //
14304 // We could try to recognize lsls+lsrs or lsrs+lsls pairs here.
14305 // We could try to prefer Thumb1 immediates which can be lowered to a
14306 // two-instruction sequence.
14307 // We could try to recognize more legal ARM/Thumb2 immediates here.
14308
14309 return false;
14310}
14311
14312
14313//===----------------------------------------------------------------------===//
14314// ARM Inline Assembly Support
14315//===----------------------------------------------------------------------===//
14316
14318 // Looking for "rev" which is V6+.
14319 if (!Subtarget->hasV6Ops())
14320 return false;
14321
14322 InlineAsm *IA = cast<InlineAsm>(CI->getCalledValue());
14323 std::string AsmStr = IA->getAsmString();
14324 SmallVector<StringRef, 4> AsmPieces;
14325 SplitString(AsmStr, AsmPieces, ";\n");
14326
14327 switch (AsmPieces.size()) {
14328 default: return false;
14329 case 1:
14330 AsmStr = AsmPieces[0];
14331 AsmPieces.clear();
14332 SplitString(AsmStr, AsmPieces, " \t,");
14333
14334 // rev $0, $1
14335 if (AsmPieces.size() == 3 &&
14336 AsmPieces[0] == "rev" && AsmPieces[1] == "$0" && AsmPieces[2] == "$1" &&
14337 IA->getConstraintString().compare(0, 4, "=l,l") == 0) {
14338 IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
14339 if (Ty && Ty->getBitWidth() == 32)
14341 }
14342 break;
14343 }
14344
14345 return false;
14346}
14347
14348const char *ARMTargetLowering::LowerXConstraint(EVT ConstraintVT) const {
14349 // At this point, we have to lower this constraint to something else, so we
14350 // lower it to an "r" or "w". However, by doing this we will force the result
14351 // to be in register, while the X constraint is much more permissive.
14352 //
14353 // Although we are correct (we are free to emit anything, without
14354 // constraints), we might break use cases that would expect us to be more
14355 // efficient and emit something else.
14356 if (!Subtarget->hasVFP2Base())
14357 return "r";
14358 if (ConstraintVT.isFloatingPoint())
14359 return "w";
14360 if (ConstraintVT.isVector() && Subtarget->hasNEON() &&
14361 (ConstraintVT.getSizeInBits() == 64 ||
14362 ConstraintVT.getSizeInBits() == 128))
14363 return "w";
14364
14365 return "r";
14366}
14367
14368/// getConstraintType - Given a constraint letter, return the type of
14369/// constraint it is for this target.
14372 unsigned S = Constraint.size();
14373 if (S == 1) {
14374 switch (Constraint[0]) {
14375 default: break;
14376 case 'l': return C_RegisterClass;
14377 case 'w': return C_RegisterClass;
14378 case 'h': return C_RegisterClass;
14379 case 'x': return C_RegisterClass;
14380 case 't': return C_RegisterClass;
14381 case 'j': return C_Immediate; // Constant for movw.
14382 // An address with a single base register. Due to the way we
14383 // currently handle addresses it is the same as an 'r' memory constraint.
14384 case 'Q': return C_Memory;
14385 }
14386 } else if (S == 2) {
14387 switch (Constraint[0]) {
14388 default: break;
14389 case 'T': return C_RegisterClass;
14390 // All 'U+' constraints are addresses.
14391 case 'U': return C_Memory;
14392 }
14393 }
14394 return TargetLowering::getConstraintType(Constraint);
14395}
14396
14397/// Examine constraint type and operand type and determine a weight value.
14398/// This object must already have been set up with the operand type
14399/// and the current alternative constraint selected.
14402 AsmOperandInfo &info, const char *constraint) const {
14404 Value *CallOperandVal = info.CallOperandVal;
14405 // If we don't have a value, we can't do a match,
14406 // but allow it at the lowest weight.
14407 if (!CallOperandVal)
14408 return CW_Default;
14409 Type *type = CallOperandVal->getType();
14410 // Look at the constraint type.
14411 switch (*constraint) {
14412 default:
14414 break;
14415 case 'l':
14416 if (type->isIntegerTy()) {
14417 if (Subtarget->isThumb())
14418 weight = CW_SpecificReg;
14419 else
14420 weight = CW_Register;
14421 }
14422 break;
14423 case 'w':
14424 if (type->isFloatingPointTy())
14425 weight = CW_Register;
14426 break;
14427 }
14428 return weight;
14429}
14430
14431using RCPair = std::pair<unsigned, const TargetRegisterClass *>;
14432
14434 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
14435 switch (Constraint.size()) {
14436 case 1:
14437 // GCC ARM Constraint Letters
14438 switch (Constraint[0]) {
14439 case 'l': // Low regs or general regs.
14440 if (Subtarget->isThumb())
14441 return RCPair(0U, &ARM::tGPRRegClass);
14442 return RCPair(0U, &ARM::GPRRegClass);
14443 case 'h': // High regs or no regs.
14444 if (Subtarget->isThumb())
14445 return RCPair(0U, &ARM::hGPRRegClass);
14446 break;
14447 case 'r':
14448 if (Subtarget->isThumb1Only())
14449 return RCPair(0U, &ARM::tGPRRegClass);
14450 return RCPair(0U, &ARM::GPRRegClass);
14451 case 'w':
14452 if (VT == MVT::Other)
14453 break;
14454 if (VT == MVT::f32)
14455 return RCPair(0U, &ARM::SPRRegClass);
14456 if (VT.getSizeInBits() == 64)
14457 return RCPair(0U, &ARM::DPRRegClass);
14458 if (VT.getSizeInBits() == 128)
14459 return RCPair(0U, &ARM::QPRRegClass);
14460 break;
14461 case 'x':
14462 if (VT == MVT::Other)
14463 break;
14464 if (VT == MVT::f32)
14465 return RCPair(0U, &ARM::SPR_8RegClass);
14466 if (VT.getSizeInBits() == 64)
14467 return RCPair(0U, &ARM::DPR_8RegClass);
14468 if (VT.getSizeInBits() == 128)
14469 return RCPair(0U, &ARM::QPR_8RegClass);
14470 break;
14471 case 't':
14472 if (VT == MVT::Other)
14473 break;
14474 if (VT == MVT::f32 || VT == MVT::i32)
14475 return RCPair(0U, &ARM::SPRRegClass);
14476 if (VT.getSizeInBits() == 64)
14477 return RCPair(0U, &ARM::DPR_VFP2RegClass);
14478 if (VT.getSizeInBits() == 128)
14479 return RCPair(0U, &ARM::QPR_VFP2RegClass);
14480 break;
14481 }
14482 break;
14483
14484 case 2:
14485 if (Constraint[0] == 'T') {
14486 switch (Constraint[1]) {
14487 default:
14488 break;
14489 case 'e':
14490 return RCPair(0U, &ARM::tGPREvenRegClass);
14491 case 'o':
14492 return RCPair(0U, &ARM::tGPROddRegClass);
14493 }
14494 }
14495 break;
14496
14497 default:
14498 break;
14499 }
14500
14501 if (StringRef("{cc}").equals_lower(Constraint))
14502 return std::make_pair(unsigned(ARM::CPSR), &ARM::CCRRegClass);
14503
14504 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
14505}
14506
14507/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
14508/// vector. If it is invalid, don't add anything to Ops.
14510 std::string &Constraint,
14511 std::vector<SDValue>&Ops,
14512 SelectionDAG &DAG) const {
14513 SDValue Result;
14514
14515 // Currently only support length 1 constraints.
14516 if (Constraint.length() != 1) return;
14517
14518 char ConstraintLetter = Constraint[0];
14519 switch (ConstraintLetter) {
14520 default: break;
14521 case 'j':
14522 case 'I': case 'J': case 'K': case 'L':
14523 case 'M': case 'N': case 'O':
14524 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
14525 if (!C)
14526 return;
14527
14528 int64_t CVal64 = C->getSExtValue();
14529 int CVal = (int) CVal64;
14530 // None of these constraints allow values larger than 32 bits. Check
14531 // that the value fits in an int.
14532 if (CVal != CVal64)
14533 return;
14534
14535 switch (ConstraintLetter) {
14536 case 'j':
14537 // Constant suitable for movw, must be between 0 and
14538 // 65535.
14539 if (Subtarget->hasV6T2Ops())
14540 if (CVal >= 0 && CVal <= 65535)
14541 break;
14542 return;
14543 case 'I':
14544 if (Subtarget->isThumb1Only()) {
14545 // This must be a constant between 0 and 255, for ADD
14546 // immediates.
14547 if (CVal >= 0 && CVal <= 255)
14548 break;
14549 } else if (Subtarget->isThumb2()) {
14550 // A constant that can be used as an immediate value in a
14551 // data-processing instruction.
14552 if (ARM_AM::getT2SOImmVal(CVal) != -1)
14553 break;
14554 } else {
14555 // A constant that can be used as an immediate value in a
14556 // data-processing instruction.
14557 if (ARM_AM::getSOImmVal(CVal) != -1)
14558 break;
14559 }
14560 return;
14561
14562 case 'J':
14563 if (Subtarget->isThumb1Only()) {
14564 // This must be a constant between -255 and -1, for negated ADD
14565 // immediates. This can be used in GCC with an "n" modifier that
14566 // prints the negated value, for use with SUB instructions. It is
14567 // not useful otherwise but is implemented for compatibility.
14568 if (CVal >= -255 && CVal <= -1)
14569 break;
14570 } else {
14571 // This must be a constant between -4095 and 4095. It is not clear
14572 // what this constraint is intended for. Implemented for
14573 // compatibility with GCC.
14574 if (CVal >= -4095 && CVal <= 4095)
14575 break;
14576 }
14577 return;
14578
14579 case 'K':
14580 if (Subtarget->isThumb1Only()) {
14581 // A 32-bit value where only one byte has a nonzero value. Exclude
14582 // zero to match GCC. This constraint is used by GCC internally for
14583 // constants that can be loaded with a move/shift combination.
14584 // It is not useful otherwise but is implemented for compatibility.
14585 if (CVal != 0 && ARM_AM::isThumbImmShiftedVal(CVal))
14586 break;
14587 } else if (Subtarget->isThumb2()) {
14588 // A constant whose bitwise inverse can be used as an immediate
14589 // value in a data-processing instruction. This can be used in GCC
14590 // with a "B" modifier that prints the inverted value, for use with
14591 // BIC and MVN instructions. It is not useful otherwise but is
14592 // implemented for compatibility.
14593 if (ARM_AM::getT2SOImmVal(~CVal) != -1)
14594 break;
14595 } else {
14596 // A constant whose bitwise inverse can be used as an immediate
14597 // value in a data-processing instruction. This can be used in GCC
14598 // with a "B" modifier that prints the inverted value, for use with
14599 // BIC and MVN instructions. It is not useful otherwise but is
14600 // implemented for compatibility.
14601 if (ARM_AM::getSOImmVal(~CVal) != -1)
14602 break;
14603 }
14604 return;
14605
14606 case 'L':
14607 if (Subtarget->isThumb1Only()) {
14608 // This must be a constant between -7 and 7,
14609 // for 3-operand ADD/SUB immediate instructions.
14610 if (CVal >= -7 && CVal < 7)
14611 break;
14612 } else if (Subtarget->isThumb2()) {
14613 // A constant whose negation can be used as an immediate value in a
14614 // data-processing instruction. This can be used in GCC with an "n"
14615 // modifier that prints the negated value, for use with SUB
14616 // instructions. It is not useful otherwise but is implemented for
14617 // compatibility.
14618 if (ARM_AM::getT2SOImmVal(-CVal) != -1)
14619 break;
14620 } else {
14621 // A constant whose negation can be used as an immediate value in a
14622 // data-processing instruction. This can be used in GCC with an "n"
14623 // modifier that prints the negated value, for use with SUB
14624 // instructions. It is not useful otherwise but is implemented for
14625 // compatibility.
14626 if (ARM_AM::getSOImmVal(-CVal) != -1)
14627 break;
14628 }
14629 return;
14630
14631 case 'M':
14632 if (Subtarget->isThumb1Only()) {
14633 // This must be a multiple of 4 between 0 and 1020, for
14634 // ADD sp + immediate.
14635 if ((CVal >= 0 && CVal <= 1020) && ((CVal & 3) == 0))
14636 break;
14637 } else {
14638 // A power of two or a constant between 0 and 32. This is used in
14639 // GCC for the shift amount on shifted register operands, but it is
14640 // useful in general for any shift amounts.
14641 if ((CVal >= 0 && CVal <= 32) || ((CVal & (CVal - 1)) == 0))
14642 break;
14643 }
14644 return;
14645
14646 case 'N':
14647 if (Subtarget->isThumb()) { // FIXME thumb2
14648 // This must be a constant between 0 and 31, for shift amounts.
14649 if (CVal >= 0 && CVal <= 31)
14650 break;
14651 }
14652 return;
14653
14654 case 'O':
14655 if (Subtarget->isThumb()) { // FIXME thumb2
14656 // This must be a multiple of 4 between -508 and 508, for
14657 // ADD/SUB sp = sp + immediate.
14658 if ((CVal >= -508 && CVal <= 508) && ((CVal & 3) == 0))
14659 break;
14660 }
14661 return;
14662 }
14663 Result = DAG.getTargetConstant(CVal, SDLoc(Op), Op.getValueType());
14664 break;
14665 }
14666
14667 if (Result.getNode()) {
14668 Ops.push_back(Result);
14669 return;
14670 }
14671 return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
14672}
14673
14675 const SDNode *N, MVT::SimpleValueType SVT) {
14676 assert((N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM ||
14677 N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM) &&
14678 "Unhandled Opcode in getDivRemLibcall");
14679 bool isSigned = N->getOpcode() == ISD::SDIVREM ||
14680 N->getOpcode() == ISD::SREM;
14681 RTLIB::Libcall LC;
14682 switch (SVT) {
14683 default: llvm_unreachable("Unexpected request for libcall!");
14684 case MVT::i8: LC = isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
14685 case MVT::i16: LC = isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
14686 case MVT::i32: LC = isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
14687 case MVT::i64: LC = isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
14688 }
14689 return LC;
14690}
14691
14693 const SDNode *N, LLVMContext *Context, const ARMSubtarget *Subtarget) {
14694 assert((N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM ||
14695 N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM) &&
14696 "Unhandled Opcode in getDivRemArgList");
14697 bool isSigned = N->getOpcode() == ISD::SDIVREM ||
14698 N->getOpcode() == ISD::SREM;
14701 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
14702 EVT ArgVT = N->getOperand(i).getValueType();
14703 Type *ArgTy = ArgVT.getTypeForEVT(*Context);
14704 Entry.Node = N->getOperand(i);
14705 Entry.Ty = ArgTy;
14706 Entry.IsSExt = isSigned;
14707 Entry.IsZExt = !isSigned;
14708 Args.push_back(Entry);
14709 }
14710 if (Subtarget->isTargetWindows() && Args.size() >= 2)
14711 std::swap(Args[0], Args[1]);
14712 return Args;
14713}
14714
14715SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
14716 assert((Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() ||
14717 Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() ||
14718 Subtarget->isTargetWindows()) &&
14719 "Register-based DivRem lowering only");
14720 unsigned Opcode = Op->getOpcode();
14721 assert((Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) &&
14722 "Invalid opcode for Div/Rem lowering");
14723 bool isSigned = (Opcode == ISD::SDIVREM);
14724 EVT VT = Op->getValueType(0);
14725 Type *Ty = VT.getTypeForEVT(*DAG.getContext());
14726 SDLoc dl(Op);
14727
14728 // If the target has hardware divide, use divide + multiply + subtract:
14729 // div = a / b
14730 // rem = a - b * div
14731 // return {div, rem}
14732 // This should be lowered into UDIV/SDIV + MLS later on.
14733 bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode()
14734 : Subtarget->hasDivideInARMMode();
14735 if (hasDivide && Op->getValueType(0).isSimple() &&
14736 Op->getSimpleValueType(0) == MVT::i32) {
14737 unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
14738 const SDValue Dividend = Op->getOperand(0);
14739 const SDValue Divisor = Op->getOperand(1);
14740 SDValue Div = DAG.getNode(DivOpcode, dl, VT, Dividend, Divisor);
14741 SDValue Mul = DAG.getNode(ISD::MUL, dl, VT, Div, Divisor);
14742 SDValue Rem = DAG.getNode(ISD::SUB, dl, VT, Dividend, Mul);
14743
14744 SDValue Values[2] = {Div, Rem};
14745 return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VT, VT), Values);
14746 }
14747
14748 RTLIB::Libcall LC = getDivRemLibcall(Op.getNode(),
14749 VT.getSimpleVT().SimpleTy);
14750 SDValue InChain = DAG.getEntryNode();
14751
14753 DAG.getContext(),
14754 Subtarget);
14755
14758
14759 Type *RetTy = StructType::get(Ty, Ty);
14760
14761 if (Subtarget->isTargetWindows())
14762 InChain = WinDBZCheckDenominator(DAG, Op.getNode(), InChain);
14763
14765 CLI.setDebugLoc(dl).setChain(InChain)
14766 .setCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
14767 .setInRegister().setSExtResult(isSigned).setZExtResult(!isSigned);
14768
14769 std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI);
14770 return CallInfo.first;
14771}
14772
14773// Lowers REM using divmod helpers
14774// see RTABI section 4.2/4.3
14775SDValue ARMTargetLowering::LowerREM(SDNode *N, SelectionDAG &DAG) const {
14776 // Build return types (div and rem)
14777 std::vector<Type*> RetTyParams;
14778 Type *RetTyElement;
14779
14780 switch (N->getValueType(0).getSimpleVT().SimpleTy) {
14781 default: llvm_unreachable("Unexpected request for libcall!");
14782 case MVT::i8: RetTyElement = Type::getInt8Ty(*DAG.getContext()); break;
14783 case MVT::i16: RetTyElement = Type::getInt16Ty(*DAG.getContext()); break;
14784 case MVT::i32: RetTyElement = Type::getInt32Ty(*DAG.getContext()); break;
14785 case MVT::i64: RetTyElement = Type::getInt64Ty(*DAG.getContext()); break;
14786 }
14787
14788 RetTyParams.push_back(RetTyElement);
14789 RetTyParams.push_back(RetTyElement);
14790 ArrayRef<Type*> ret = ArrayRef<Type*>(RetTyParams);
14791 Type *RetTy = StructType::get(*DAG.getContext(), ret);
14792
14793 RTLIB::Libcall LC = getDivRemLibcall(N, N->getValueType(0).getSimpleVT().
14794 SimpleTy);
14795 SDValue InChain = DAG.getEntryNode();
14797 Subtarget);
14798 bool isSigned = N->getOpcode() == ISD::SREM;
14801
14802 if (Subtarget->isTargetWindows())
14803 InChain = WinDBZCheckDenominator(DAG, N, InChain);
14804
14805 // Lower call
14806 CallLoweringInfo CLI(DAG);
14807 CLI.setChain(InChain)
14808 .setCallee(CallingConv::ARM_AAPCS, RetTy, Callee, std::move(Args))
14809 .setSExtResult(isSigned).setZExtResult(!isSigned).setDebugLoc(SDLoc(N));
14810 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
14811
14812 // Return second (rem) result operand (first contains div)
14813 SDNode *ResNode = CallResult.first.getNode();
14814 assert(ResNode->getNumOperands() == 2 && "divmod should return two operands");
14815 return ResNode->getOperand(1);
14816}
14817
14818SDValue
14819ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {
14820 assert(Subtarget->isTargetWindows() && "unsupported target platform");
14821 SDLoc DL(Op);
14822
14823 // Get the inputs.
14824 SDValue Chain = Op.getOperand(0);
14825 SDValue Size = Op.getOperand(1);
14826
14827 if (DAG.getMachineFunction().getFunction().hasFnAttribute(
14828 "no-stack-arg-probe")) {
14829 unsigned Align = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
14830 SDValue SP = DAG.getCopyFromReg(Chain, DL, ARM::SP, MVT::i32);
14831 Chain = SP.getValue(1);
14832 SP = DAG.getNode(ISD::SUB, DL, MVT::i32, SP, Size);
14833 if (Align)
14834 SP = DAG.getNode(ISD::AND, DL, MVT::i32, SP.getValue(0),
14835 DAG.getConstant(-(uint64_t)Align, DL, MVT::i32));
14836 Chain = DAG.getCopyToReg(Chain, DL, ARM::SP, SP);
14837 SDValue Ops[2] = { SP, Chain };
14838 return DAG.getMergeValues(Ops, DL);
14839 }
14840
14841 SDValue Words = DAG.getNode(ISD::SRL, DL, MVT::i32, Size,
14842 DAG.getConstant(2, DL, MVT::i32));
14843
14844 SDValue Flag;
14845 Chain = DAG.getCopyToReg(Chain, DL, ARM::R4, Words, Flag);
14846 Flag = Chain.getValue(1);
14847
14848 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
14849 Chain = DAG.getNode(ARMISD::WIN__CHKSTK, DL, NodeTys, Chain, Flag);
14850
14851 SDValue NewSP = DAG.getCopyFromReg(Chain, DL, ARM::SP, MVT::i32);
14852 Chain = NewSP.getValue(1);
14853
14854 SDValue Ops[2] = { NewSP, Chain };
14855 return DAG.getMergeValues(Ops, DL);
14856}
14857
14858SDValue ARMTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
14859 SDValue SrcVal = Op.getOperand(0);
14860 const unsigned DstSz = Op.getValueType().getSizeInBits();
14861 const unsigned SrcSz = SrcVal.getValueType().getSizeInBits();
14862 assert(DstSz > SrcSz && DstSz <= 64 && SrcSz >= 16 &&
14863 "Unexpected type for custom-lowering FP_EXTEND");
14864
14865 assert((!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) &&
14866 "With both FP DP and 16, any FP conversion is legal!");
14867
14868 assert(!(DstSz == 32 && Subtarget->hasFP16()) &&
14869 "With FP16, 16 to 32 conversion is legal!");
14870
14871 // Either we are converting from 16 -> 64, without FP16 and/or
14872 // FP.double-precision or without Armv8-fp. So we must do it in two
14873 // steps.
14874 // Or we are converting from 32 -> 64 without fp.double-precision or 16 -> 32
14875 // without FP16. So we must do a function call.
14876 SDLoc Loc(Op);
14877 RTLIB::Libcall LC;
14878 if (SrcSz == 16) {
14879 // Instruction from 16 -> 32
14880 if (Subtarget->hasFP16())
14881 SrcVal = DAG.getNode(ISD::FP_EXTEND, Loc, MVT::f32, SrcVal);
14882 // Lib call from 16 -> 32
14883 else {
14885 assert(LC != RTLIB::UNKNOWN_LIBCALL &&
14886 "Unexpected type for custom-lowering FP_EXTEND");
14887 SrcVal =
14888 makeLibCall(DAG, LC, MVT::f32, SrcVal, /*isSigned*/ false, Loc).first;
14889 }
14890 }
14891
14892 if (DstSz != 64)
14893 return SrcVal;
14894 // For sure now SrcVal is 32 bits
14895 if (Subtarget->hasFP64()) // Instruction from 32 -> 64
14896 return DAG.getNode(ISD::FP_EXTEND, Loc, MVT::f64, SrcVal);
14897
14899 assert(LC != RTLIB::UNKNOWN_LIBCALL &&
14900 "Unexpected type for custom-lowering FP_EXTEND");
14901 return makeLibCall(DAG, LC, MVT::f64, SrcVal, /*isSigned*/ false, Loc).first;
14902}
14903
14904SDValue ARMTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
14905 SDValue SrcVal = Op.getOperand(0);
14906 EVT SrcVT = SrcVal.getValueType();
14907 EVT DstVT = Op.getValueType();
14908 const unsigned DstSz = Op.getValueType().getSizeInBits();
14909 const unsigned SrcSz = SrcVT.getSizeInBits();
14910 (void)DstSz;
14911 assert(DstSz < SrcSz && SrcSz <= 64 && DstSz >= 16 &&
14912 "Unexpected type for custom-lowering FP_ROUND");
14913
14914 assert((!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) &&
14915 "With both FP DP and 16, any FP conversion is legal!");
14916
14917 SDLoc Loc(Op);
14918
14919 // Instruction from 32 -> 16 if hasFP16 is valid
14920 if (SrcSz == 32 && Subtarget->hasFP16())
14921 return Op;
14922
14923 // Lib call from 32 -> 16 / 64 -> [32, 16]
14924 RTLIB::Libcall LC = RTLIB::getFPROUND(SrcVT, DstVT);
14925 assert(LC != RTLIB::UNKNOWN_LIBCALL &&
14926 "Unexpected type for custom-lowering FP_ROUND");
14927 return makeLibCall(DAG, LC, DstVT, SrcVal, /*isSigned*/ false, Loc).first;
14928}
14929
14930void ARMTargetLowering::lowerABS(SDNode *N, SmallVectorImpl<SDValue> &Results,
14931 SelectionDAG &DAG) const {
14932 assert(N->getValueType(0) == MVT::i64 && "Unexpected type (!= i64) on ABS.");
14933 MVT HalfT = MVT::i32;
14934 SDLoc dl(N);
14935 SDValue Hi, Lo, Tmp;
14936
14939 return ;
14940
14941 unsigned OpTypeBits = HalfT.getScalarSizeInBits();
14942 SDVTList VTList = DAG.getVTList(HalfT, MVT::i1);
14943
14944 Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(0),
14945 DAG.getConstant(0, dl, HalfT));
14946 Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(0),
14947 DAG.getConstant(1, dl, HalfT));
14948
14949 Tmp = DAG.getNode(ISD::SRA, dl, HalfT, Hi,
14950 DAG.getConstant(OpTypeBits - 1, dl,
14951 getShiftAmountTy(HalfT, DAG.getDataLayout())));
14952 Lo = DAG.getNode(ISD::UADDO, dl, VTList, Tmp, Lo);
14953 Hi = DAG.getNode(ISD::ADDCARRY, dl, VTList, Tmp, Hi,
14954 SDValue(Lo.getNode(), 1));
14955 Hi = DAG.getNode(ISD::XOR, dl, HalfT, Tmp, Hi);
14956 Lo = DAG.getNode(ISD::XOR, dl, HalfT, Tmp, Lo);
14957
14958 Results.push_back(Lo);
14959 Results.push_back(Hi);
14960}
14961
14962bool
14964 // The ARM target isn't yet aware of offsets.
14965 return false;
14966}
14967
14969 if (v == 0xffffffff)
14970 return false;
14971
14972 // there can be 1's on either or both "outsides", all the "inside"
14973 // bits must be 0's
14974 return isShiftedMask_32(~v);
14975}
14976
14977/// isFPImmLegal - Returns true if the target can instruction select the
14978/// specified FP immediate natively. If false, the legalizer will
14979/// materialize the FP immediate as a load from a constant pool.
14981 bool ForCodeSize) const {
14982 if (!Subtarget->hasVFP3Base())
14983 return false;
14984 if (VT == MVT::f16 && Subtarget->hasFullFP16())
14985 return ARM_AM::getFP16Imm(Imm) != -1;
14986 if (VT == MVT::f32)
14987 return ARM_AM::getFP32Imm(Imm) != -1;
14988 if (VT == MVT::f64 && Subtarget->hasFP64())
14989 return ARM_AM::getFP64Imm(Imm) != -1;
14990 return false;
14991}
14992
14993/// getTgtMemIntrinsic - Represent NEON load and store intrinsics as
14994/// MemIntrinsicNodes. The associated MachineMemOperands record the alignment
14995/// specified in the intrinsic calls.
14997 const CallInst &I,
14998 MachineFunction &MF,
14999 unsigned Intrinsic) const {
15000 switch (Intrinsic) {
15001 case Intrinsic::arm_neon_vld1:
15002 case Intrinsic::arm_neon_vld2:
15003 case Intrinsic::arm_neon_vld3:
15004 case Intrinsic::arm_neon_vld4:
15005 case Intrinsic::arm_neon_vld2lane:
15006 case Intrinsic::arm_neon_vld3lane:
15007 case Intrinsic::arm_neon_vld4lane:
15008 case Intrinsic::arm_neon_vld2dup:
15009 case Intrinsic::arm_neon_vld3dup:
15010 case Intrinsic::arm_neon_vld4dup: {
15012 // Conservatively set memVT to the entire set of vectors loaded.
15013 auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
15014 uint64_t NumElts = DL.getTypeSizeInBits(I.getType()) / 64;
15015 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
15016 Info.ptrVal = I.getArgOperand(0);
15017 Info.offset = 0;
15018 Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1);
15019 Info.align = cast<ConstantInt>(AlignArg)->getZExtValue();
15020 // volatile loads with NEON intrinsics not supported
15022 return true;
15023 }
15024 case Intrinsic::arm_neon_vld1x2:
15025 case Intrinsic::arm_neon_vld1x3:
15026 case Intrinsic::arm_neon_vld1x4: {
15028 // Conservatively set memVT to the entire set of vectors loaded.
15029 auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
15030 uint64_t NumElts = DL.getTypeSizeInBits(I.getType()) / 64;
15031 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
15032 Info.ptrVal = I.getArgOperand(I.getNumArgOperands() - 1);
15033 Info.offset = 0;
15034 Info.align = 0;
15035 // volatile loads with NEON intrinsics not supported
15037 return true;
15038 }
15039 case Intrinsic::arm_neon_vst1:
15040 case Intrinsic::arm_neon_vst2:
15041 case Intrinsic::arm_neon_vst3:
15042 case Intrinsic::arm_neon_vst4:
15043 case Intrinsic::arm_neon_vst2lane:
15044 case Intrinsic::arm_neon_vst3lane:
15045 case Intrinsic::arm_neon_vst4lane: {
15047 // Conservatively set memVT to the entire set of vectors stored.
15048 auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
15049 unsigned NumElts = 0;
15050 for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) {
15051 Type *ArgTy = I.getArgOperand(ArgI)->getType();
15052 if (!ArgTy->isVectorTy())
15053 break;
15054 NumElts += DL.getTypeSizeInBits(ArgTy) / 64;
15055 }
15056 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
15057 Info.ptrVal = I.getArgOperand(0);
15058 Info.offset = 0;
15059 Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1);
15060 Info.align = cast<ConstantInt>(AlignArg)->getZExtValue();
15061 // volatile stores with NEON intrinsics not supported
15063 return true;
15064 }
15065 case Intrinsic::arm_neon_vst1x2:
15066 case Intrinsic::arm_neon_vst1x3:
15067 case Intrinsic::arm_neon_vst1x4: {
15069 // Conservatively set memVT to the entire set of vectors stored.
15070 auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
15071 unsigned NumElts = 0;
15072 for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) {
15073 Type *ArgTy = I.getArgOperand(ArgI)->getType();
15074 if (!ArgTy->isVectorTy())
15075 break;
15076 NumElts += DL.getTypeSizeInBits(ArgTy) / 64;
15077 }
15078 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
15079 Info.ptrVal = I.getArgOperand(0);
15080 Info.offset = 0;
15081 Info.align = 0;
15082 // volatile stores with NEON intrinsics not supported
15084 return true;
15085 }
15086 case Intrinsic::arm_ldaex:
15087 case Intrinsic::arm_ldrex: {
15088 auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
15089 PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType());
15091 Info.memVT = MVT::getVT(PtrTy->getElementType());
15092 Info.ptrVal = I.getArgOperand(0);
15093 Info.offset = 0;
15094 Info.align = DL.getABITypeAlignment(PtrTy->getElementType());
15096 return true;
15097 }
15098 case Intrinsic::arm_stlex:
15099 case Intrinsic::arm_strex: {
15100 auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
15101 PointerType *PtrTy = cast<PointerType>(I.getArgOperand(1)->getType());
15103 Info.memVT = MVT::getVT(PtrTy->getElementType());
15104 Info.ptrVal = I.getArgOperand(1);
15105 Info.offset = 0;
15106 Info.align = DL.getABITypeAlignment(PtrTy->getElementType());
15108 return true;
15109 }
15110 case Intrinsic::arm_stlexd:
15111 case Intrinsic::arm_strexd:
15113 Info.memVT = MVT::i64;
15114 Info.ptrVal = I.getArgOperand(2);
15115 Info.offset = 0;
15116 Info.align = 8;
15118 return true;
15119
15120 case Intrinsic::arm_ldaexd:
15121 case Intrinsic::arm_ldrexd:
15123 Info.memVT = MVT::i64;
15124 Info.ptrVal = I.getArgOperand(0);
15125 Info.offset = 0;
15126 Info.align = 8;
15128 return true;
15129
15130 default:
15131 break;
15132 }
15133
15134 return false;
15135}
15136
15137/// Returns true if it is beneficial to convert a load of a constant
15138/// to just the constant itself.
15140 Type *Ty) const {
15141 assert(Ty->isIntegerTy());
15142
15143 unsigned Bits = Ty->getPrimitiveSizeInBits();
15144 if (Bits == 0 || Bits > 32)
15145 return false;
15146 return true;
15147}
15148
15150 unsigned Index) const {
15152 return false;
15153
15154 return (Index == 0 || Index == ResVT.getVectorNumElements());
15155}
15156
15158 ARM_MB::MemBOpt Domain) const {
15159 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
15160
15161 // First, if the target has no DMB, see what fallback we can use.
15162 if (!Subtarget->hasDataBarrier()) {
15163 // Some ARMv6 cpus can support data barriers with an mcr instruction.
15164 // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
15165 // here.
15166 if (Subtarget->hasV6Ops() && !Subtarget->isThumb()) {
15167 Function *MCR = Intrinsic::getDeclaration(M, Intrinsic::arm_mcr);
15168 Value* args[6] = {Builder.getInt32(15), Builder.getInt32(0),
15169 Builder.getInt32(0), Builder.getInt32(7),
15170 Builder.getInt32(10), Builder.getInt32(5)};
15171 return Builder.CreateCall(MCR, args);
15172 } else {
15173 // Instead of using barriers, atomic accesses on these subtargets use
15174 // libcalls.
15175 llvm_unreachable("makeDMB on a target so old that it has no barriers");
15176 }
15177 } else {
15178 Function *DMB = Intrinsic::getDeclaration(M, Intrinsic::arm_dmb);
15179 // Only a full system barrier exists in the M-class architectures.
15180 Domain = Subtarget->isMClass() ? ARM_MB::SY : Domain;
15181 Constant *CDomain = Builder.getInt32(Domain);
15182 return Builder.CreateCall(DMB, CDomain);
15183 }
15184}
15185
15186// Based on http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
15188 Instruction *Inst,
15189 AtomicOrdering Ord) const {
15190 switch (Ord) {
15193 llvm_unreachable("Invalid fence: unordered/non-atomic");
15196 return nullptr; // Nothing to do
15198 if (!Inst->hasAtomicStore())
15199 return nullptr; // Nothing to do
15203 if (Subtarget->preferISHSTBarriers())
15204 return makeDMB(Builder, ARM_MB::ISHST);
15205 // FIXME: add a comment with a link to documentation justifying this.
15206 else
15207 return makeDMB(Builder, ARM_MB::ISH);
15208 }
15209 llvm_unreachable("Unknown fence ordering in emitLeadingFence");
15210}
15211
15213 Instruction *Inst,
15214 AtomicOrdering Ord) const {
15215 switch (Ord) {
15218 llvm_unreachable("Invalid fence: unordered/not-atomic");
15221 return nullptr; // Nothing to do
15225 return makeDMB(Builder, ARM_MB::ISH);
15226 }
15227 llvm_unreachable("Unknown fence ordering in emitTrailingFence");
15228}
15229
15230// Loads and stores less than 64-bits are already atomic; ones above that
15231// are doomed anyway, so defer to the default libcall and blame the OS when
15232// things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit
15233// anything for those.
15235 unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits();
15236 return (Size == 64) && !Subtarget->isMClass();
15237}
15238
15239// Loads and stores less than 64-bits are already atomic; ones above that
15240// are doomed anyway, so defer to the default libcall and blame the OS when
15241// things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit
15242// anything for those.
15243// FIXME: ldrd and strd are atomic if the CPU has LPAE (e.g. A15 has that
15244// guarantee, see DDI0406C ARM architecture reference manual,
15245// sections A8.8.72-74 LDRD)
15248 unsigned Size = LI->getType()->getPrimitiveSizeInBits();
15249 return ((Size == 64) && !Subtarget->isMClass()) ? AtomicExpansionKind::LLOnly
15251}
15252
15253// For the real atomic operations, we have ldrex/strex up to 32 bits,
15254// and up to 64 bits on the non-M profiles
15257 if (AI->isFloatingPointOperation())
15259
15260 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
15261 bool hasAtomicRMW = !Subtarget->isThumb() || Subtarget->hasV8MBaselineOps();
15262 return (Size <= (Subtarget->isMClass() ? 32U : 64U) && hasAtomicRMW)
15265}
15266
15269 // At -O0, fast-regalloc cannot cope with the live vregs necessary to
15270 // implement cmpxchg without spilling. If the address being exchanged is also
15271 // on the stack and close enough to the spill slot, this can lead to a
15272 // situation where the monitor always gets cleared and the atomic operation
15273 // can never succeed. So at -O0 we need a late-expanded pseudo-inst instead.
15274 bool HasAtomicCmpXchg =
15275 !Subtarget->isThumb() || Subtarget->hasV8MBaselineOps();
15276 if (getTargetMachine().getOptLevel() != 0 && HasAtomicCmpXchg)
15279}
15280
15282 const Instruction *I) const {
15283 return InsertFencesForAtomic;
15284}
15285
15286// This has so far only been implemented for MachO.
15288 return Subtarget->isTargetMachO();
15289}
15290
15292 if (!Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
15294
15295 // MSVC CRT has a global variable holding security cookie.
15296 M.getOrInsertGlobal("__security_cookie",
15297 Type::getInt8PtrTy(M.getContext()));
15298
15299 // MSVC CRT has a function to validate security cookie.
15300 FunctionCallee SecurityCheckCookie = M.getOrInsertFunction(
15301 "__security_check_cookie", Type::getVoidTy(M.getContext()),
15302 Type::getInt8PtrTy(M.getContext()));
15303 if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee()))
15304 F->addAttribute(1, Attribute::AttrKind::InReg);
15305}
15306
15308 // MSVC CRT has a global variable holding security cookie.
15309 if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
15310 return M.getGlobalVariable("__security_cookie");
15312}
15313
15315 // MSVC CRT has a function to validate security cookie.
15316 if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
15317 return M.getFunction("__security_check_cookie");
15319}
15320
15322 unsigned &Cost) const {
15323 // If we do not have NEON, vector types are not natively supported.
15324 if (!Subtarget->hasNEON())
15325 return false;
15326
15327 // Floating point values and vector values map to the same register file.
15328 // Therefore, although we could do a store extract of a vector type, this is
15329 // better to leave at float as we have more freedom in the addressing mode for
15330 // those.
15331 if (VectorTy->isFPOrFPVectorTy())
15332 return false;
15333
15334 // If the index is unknown at compile time, this is very expensive to lower
15335 // and it is not possible to combine the store with the extract.
15336 if (!isa<ConstantInt>(Idx))
15337 return false;
15338
15339 assert(VectorTy->isVectorTy() && "VectorTy is not a vector type");
15340 unsigned BitWidth = cast<VectorType>(VectorTy)->getBitWidth();
15341 // We can do a store + vector extract on any vector that fits perfectly in a D
15342 // or Q register.
15343 if (BitWidth == 64 || BitWidth == 128) {
15344 Cost = 0;
15345 return true;
15346 }
15347 return false;
15348}
15349
15351 return Subtarget->hasV6T2Ops();
15352}
15353
15355 return Subtarget->hasV6T2Ops();
15356}
15357
15359 return !Subtarget->hasMinSize();
15360}
15361
15363 AtomicOrdering Ord) const {
15364 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
15365 Type *ValTy = cast<PointerType>(Addr->getType())->getElementType();
15366 bool IsAcquire = isAcquireOrStronger(Ord);
15367
15368 // Since i64 isn't legal and intrinsics don't get type-lowered, the ldrexd
15369 // intrinsic must return {i32, i32} and we have to recombine them into a
15370 // single i64 here.
15371 if (ValTy->getPrimitiveSizeInBits() == 64) {
15372 Intrinsic::ID Int =
15373 IsAcquire ? Intrinsic::arm_ldaexd : Intrinsic::arm_ldrexd;
15374 Function *Ldrex = Intrinsic::getDeclaration(M, Int);
15375
15376 Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
15377 Value *LoHi = Builder.CreateCall(Ldrex, Addr, "lohi");
15378
15379 Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
15380 Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
15381 if (!Subtarget->isLittle())
15382 std::swap (Lo, Hi);
15383 Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
15384 Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
15385 return Builder.CreateOr(
15386 Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 32)), "val64");
15387 }
15388
15389 Type *Tys[] = { Addr->getType() };
15390 Intrinsic::ID Int = IsAcquire ? Intrinsic::arm_ldaex : Intrinsic::arm_ldrex;
15391 Function *Ldrex = Intrinsic::getDeclaration(M, Int, Tys);
15392
15393 return Builder.CreateTruncOrBitCast(
15394 Builder.CreateCall(Ldrex, Addr),
15395 cast<PointerType>(Addr->getType())->getElementType());
15396}
15397
15399 IRBuilder<> &Builder) const {
15400 if (!Subtarget->hasV7Ops())
15401 return;
15402 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
15403 Builder.CreateCall(Intrinsic::getDeclaration(M, Intrinsic::arm_clrex));
15404}
15405
15407 Value *Addr,
15408 AtomicOrdering Ord) const {
15409 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
15410 bool IsRelease = isReleaseOrStronger(Ord);
15411
15412 // Since the intrinsics must have legal type, the i64 intrinsics take two
15413 // parameters: "i32, i32". We must marshal Val into the appropriate form
15414 // before the call.
15415 if (Val->getType()->getPrimitiveSizeInBits() == 64) {
15416 Intrinsic::ID Int =
15417 IsRelease ? Intrinsic::arm_stlexd : Intrinsic::arm_strexd;
15418 Function *Strex = Intrinsic::getDeclaration(M, Int);
15419 Type *Int32Ty = Type::getInt32Ty(M->getContext());
15420
15421 Value *Lo = Builder.CreateTrunc(Val, Int32Ty, "lo");
15422 Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 32), Int32Ty, "hi");
15423 if (!Subtarget->isLittle())
15424 std::swap(Lo, Hi);
15425 Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
15426 return Builder.CreateCall(Strex, {Lo, Hi, Addr});
15427 }
15428
15429 Intrinsic::ID Int = IsRelease ? Intrinsic::arm_stlex : Intrinsic::arm_strex;
15430 Type *Tys[] = { Addr->getType() };
15431 Function *Strex = Intrinsic::getDeclaration(M, Int, Tys);
15432
15433 return Builder.CreateCall(
15434 Strex, {Builder.CreateZExtOrBitCast(
15435 Val, Strex->getFunctionType()->getParamType(0)),
15436 Addr});
15437}
15438
15439
15441 return Subtarget->isMClass();
15442}
15443
15444/// A helper function for determining the number of interleaved accesses we
15445/// will generate when lowering accesses of the given type.
15446unsigned
15448 const DataLayout &DL) const {
15449 return (DL.getTypeSizeInBits(VecTy) + 127) / 128;
15450}
15451
15453 VectorType *VecTy, const DataLayout &DL) const {
15454
15455 unsigned VecSize = DL.getTypeSizeInBits(VecTy);
15456 unsigned ElSize = DL.getTypeSizeInBits(VecTy->getElementType());
15457
15458 // Ensure the vector doesn't have f16 elements. Even though we could do an
15459 // i16 vldN, we can't hold the f16 vectors and will end up converting via
15460 // f32.
15461 if (VecTy->getElementType()->isHalfTy())
15462 return false;
15463
15464 // Ensure the number of vector elements is greater than 1.
15465 if (VecTy->getNumElements() < 2)
15466 return false;
15467
15468 // Ensure the element type is legal.
15469 if (ElSize != 8 && ElSize != 16 && ElSize != 32)
15470 return false;
15471
15472 // Ensure the total vector size is 64 or a multiple of 128. Types larger than
15473 // 128 will be split into multiple interleaved accesses.
15474 return VecSize == 64 || VecSize % 128 == 0;
15475}
15476
15477/// Lower an interleaved load into a vldN intrinsic.
15478///
15479/// E.g. Lower an interleaved load (Factor = 2):
15480/// %wide.vec = load <8 x i32>, <8 x i32>* %ptr, align 4
15481/// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements
15482/// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements
15483///
15484/// Into:
15485/// %vld2 = { <4 x i32>, <4 x i32> } call llvm.arm.neon.vld2(%ptr, 4)
15486/// %vec0 = extractelement { <4 x i32>, <4 x i32> } %vld2, i32 0
15487/// %vec1 = extractelement { <4 x i32>, <4 x i32> } %vld2, i32 1
15490 ArrayRef<unsigned> Indices, unsigned Factor) const {
15491 assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
15492 "Invalid interleave factor");
15493 assert(!Shuffles.empty() && "Empty shufflevector input");
15494 assert(Shuffles.size() == Indices.size() &&
15495 "Unmatched number of shufflevectors and indices");
15496
15497 VectorType *VecTy = Shuffles[0]->getType();
15498 Type *EltTy = VecTy->getVectorElementType();
15499
15500 const DataLayout &DL = LI->getModule()->getDataLayout();
15501
15502 // Skip if we do not have NEON and skip illegal vector types. We can
15503 // "legalize" wide vector types into multiple interleaved accesses as long as
15504 // the vector types are divisible by 128.
15505 if (!Subtarget->hasNEON() || !isLegalInterleavedAccessType(VecTy, DL))
15506 return false;
15507
15508 unsigned NumLoads = getNumInterleavedAccesses(VecTy, DL);
15509
15510 // A pointer vector can not be the return type of the ldN intrinsics. Need to
15511 // load integer vectors first and then convert to pointer vectors.
15512 if (EltTy->isPointerTy())
15513 VecTy =
15515
15516 IRBuilder<> Builder(LI);
15517
15518 // The base address of the load.
15519 Value *BaseAddr = LI->getPointerOperand();
15520
15521 if (NumLoads > 1) {
15522 // If we're going to generate more than one load, reset the sub-vector type
15523 // to something legal.
15524 VecTy = VectorType::get(VecTy->getVectorElementType(),
15525 VecTy->getVectorNumElements() / NumLoads);
15526
15527 // We will compute the pointer operand of each load from the original base
15528 // address using GEPs. Cast the base address to a pointer to the scalar
15529 // element type.
15530 BaseAddr = Builder.CreateBitCast(
15531 BaseAddr, VecTy->getVectorElementType()->getPointerTo(
15532 LI->getPointerAddressSpace()));
15533 }
15534
15535 assert(isTypeLegal(EVT::getEVT(VecTy)) && "Illegal vldN vector type!");
15536
15537 Type *Int8Ptr = Builder.getInt8PtrTy(LI->getPointerAddressSpace());
15538 Type *Tys[] = {VecTy, Int8Ptr};
15539 static const Intrinsic::ID LoadInts[3] = {Intrinsic::arm_neon_vld2,
15540 Intrinsic::arm_neon_vld3,
15541 Intrinsic::arm_neon_vld4};
15542 Function *VldnFunc =
15543 Intrinsic::getDeclaration(LI->getModule(), LoadInts[Factor - 2], Tys);
15544
15545 // Holds sub-vectors extracted from the load intrinsic return values. The
15546 // sub-vectors are associated with the shufflevector instructions they will
15547 // replace.
15549
15550 for (unsigned LoadCount = 0; LoadCount < NumLoads; ++LoadCount) {
15551 // If we're generating more than one load, compute the base address of
15552 // subsequent loads as an offset from the previous.
15553 if (LoadCount > 0)
15554 BaseAddr =
15555 Builder.CreateConstGEP1_32(VecTy->getVectorElementType(), BaseAddr,
15556 VecTy->getVectorNumElements() * Factor);
15557
15559 Ops.push_back(Builder.CreateBitCast(BaseAddr, Int8Ptr));
15560 Ops.push_back(Builder.getInt32(LI->getAlignment()));
15561
15562 CallInst *VldN = Builder.CreateCall(VldnFunc, Ops, "vldN");
15563
15564 // Replace uses of each shufflevector with the corresponding vector loaded
15565 // by ldN.
15566 for (unsigned i = 0; i < Shuffles.size(); i++) {
15567 ShuffleVectorInst *SV = Shuffles[i];
15568 unsigned Index = Indices[i];
15569
15570 Value *SubVec = Builder.CreateExtractValue(VldN, Index);
15571
15572 // Convert the integer vector to pointer vector if the element is pointer.
15573 if (EltTy->isPointerTy())
15574 SubVec = Builder.CreateIntToPtr(
15576 VecTy->getVectorNumElements()));
15577
15578 SubVecs[SV].push_back(SubVec);
15579 }
15580 }
15581
15582 // Replace uses of the shufflevector instructions with the sub-vectors
15583 // returned by the load intrinsic. If a shufflevector instruction is
15584 // associated with more than one sub-vector, those sub-vectors will be
15585 // concatenated into a single wide vector.
15586 for (ShuffleVectorInst *SVI : Shuffles) {
15587 auto &SubVec = SubVecs[SVI];
15588 auto *WideVec =
15589 SubVec.size() > 1 ? concatenateVectors(Builder, SubVec) : SubVec[0];
15590 SVI->replaceAllUsesWith(WideVec);
15591 }
15592
15593 return true;
15594}
15595
15596/// Lower an interleaved store into a vstN intrinsic.
15597///
15598/// E.g. Lower an interleaved store (Factor = 3):
15599/// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
15600/// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
15601/// store <12 x i32> %i.vec, <12 x i32>* %ptr, align 4
15602///
15603/// Into:
15604/// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3>
15605/// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7>
15606/// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11>
15607/// call void llvm.arm.neon.vst3(%ptr, %sub.v0, %sub.v1, %sub.v2, 4)
15608///
15609/// Note that the new shufflevectors will be removed and we'll only generate one
15610/// vst3 instruction in CodeGen.
15611///
15612/// Example for a more general valid mask (Factor 3). Lower:
15613/// %i.vec = shuffle <32 x i32> %v0, <32 x i32> %v1,
15614/// <4, 32, 16, 5, 33, 17, 6, 34, 18, 7, 35, 19>
15615/// store <12 x i32> %i.vec, <12 x i32>* %ptr
15616///
15617/// Into:
15618/// %sub.v0 = shuffle <32 x i32> %v0, <32 x i32> v1, <4, 5, 6, 7>
15619/// %sub.v1 = shuffle <32 x i32> %v0, <32 x i32> v1, <32, 33, 34, 35>
15620/// %sub.v2 = shuffle <32 x i32> %v0, <32 x i32> v1, <16, 17, 18, 19>
15621/// call void llvm.arm.neon.vst3(%ptr, %sub.v0, %sub.v1, %sub.v2, 4)
15623 ShuffleVectorInst *SVI,
15624 unsigned Factor) const {
15625 assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
15626 "Invalid interleave factor");
15627
15628 VectorType *VecTy = SVI->getType();
15629 assert(VecTy->getVectorNumElements() % Factor == 0 &&
15630 "Invalid interleaved store");
15631
15632 unsigned LaneLen = VecTy->getVectorNumElements() / Factor;
15633 Type *EltTy = VecTy->getVectorElementType();
15634 VectorType *SubVecTy = VectorType::get(EltTy, LaneLen);
15635
15636 const DataLayout &DL = SI->getModule()->getDataLayout();
15637
15638 // Skip if we do not have NEON and skip illegal vector types. We can
15639 // "legalize" wide vector types into multiple interleaved accesses as long as
15640 // the vector types are divisible by 128.
15641 if (!Subtarget->hasNEON() || !isLegalInterleavedAccessType(SubVecTy, DL))
15642 return false;
15643
15644 unsigned NumStores = getNumInterleavedAccesses(SubVecTy, DL);
15645
15646 Value *Op0 = SVI->getOperand(0);
15647 Value *Op1 = SVI->getOperand(1);
15648 IRBuilder<> Builder(SI);
15649
15650 // StN intrinsics don't support pointer vectors as arguments. Convert pointer
15651 // vectors to integer vectors.
15652 if (EltTy->isPointerTy()) {
15653 Type *IntTy = DL.getIntPtrType(EltTy);
15654
15655 // Convert to the corresponding integer vector.
15656 Type *IntVecTy =
15658 Op0 = Builder.CreatePtrToInt(Op0, IntVecTy);
15659 Op1 = Builder.CreatePtrToInt(Op1, IntVecTy);
15660
15661 SubVecTy = VectorType::get(IntTy, LaneLen);
15662 }
15663
15664 // The base address of the store.
15665 Value *BaseAddr = SI->getPointerOperand();
15666
15667 if (NumStores > 1) {
15668 // If we're going to generate more than one store, reset the lane length
15669 // and sub-vector type to something legal.
15670 LaneLen /= NumStores;
15671 SubVecTy = VectorType::get(SubVecTy->getVectorElementType(), LaneLen);
15672
15673 // We will compute the pointer operand of each store from the original base
15674 // address using GEPs. Cast the base address to a pointer to the scalar
15675 // element type.
15676 BaseAddr = Builder.CreateBitCast(
15677 BaseAddr, SubVecTy->getVectorElementType()->getPointerTo(
15678 SI->getPointerAddressSpace()));
15679 }
15680
15681 assert(isTypeLegal(EVT::getEVT(SubVecTy)) && "Illegal vstN vector type!");
15682
15683 auto Mask = SVI->getShuffleMask();
15684
15685 Type *Int8Ptr = Builder.getInt8PtrTy(SI->getPointerAddressSpace());
15686 Type *Tys[] = {Int8Ptr, SubVecTy};
15687 static const Intrinsic::ID StoreInts[3] = {Intrinsic::arm_neon_vst2,
15688 Intrinsic::arm_neon_vst3,
15689 Intrinsic::arm_neon_vst4};
15690
15691 for (unsigned StoreCount = 0; StoreCount < NumStores; ++StoreCount) {
15692 // If we generating more than one store, we compute the base address of
15693 // subsequent stores as an offset from the previous.
15694 if (StoreCount > 0)
15695 BaseAddr = Builder.CreateConstGEP1_32(SubVecTy->getVectorElementType(),
15696 BaseAddr, LaneLen * Factor);
15697
15699 Ops.push_back(Builder.CreateBitCast(BaseAddr, Int8Ptr));
15700
15701 Function *VstNFunc =
15702 Intrinsic::getDeclaration(SI->getModule(), StoreInts[Factor - 2], Tys);
15703
15704 // Split the shufflevector operands into sub vectors for the new vstN call.
15705 for (unsigned i = 0; i < Factor; i++) {
15706 unsigned IdxI = StoreCount * LaneLen * Factor + i;
15707 if (Mask[IdxI] >= 0) {
15708 Ops.push_back(Builder.CreateShuffleVector(
15709 Op0, Op1, createSequentialMask(Builder, Mask[IdxI], LaneLen, 0)));
15710 } else {
15711 unsigned StartMask = 0;
15712 for (unsigned j = 1; j < LaneLen; j++) {
15713 unsigned IdxJ = StoreCount * LaneLen * Factor + j;
15714 if (Mask[IdxJ * Factor + IdxI] >= 0) {
15715 StartMask = Mask[IdxJ * Factor + IdxI] - IdxJ;
15716 break;
15717 }
15718 }
15719 // Note: If all elements in a chunk are undefs, StartMask=0!
15720 // Note: Filling undef gaps with random elements is ok, since
15721 // those elements were being written anyway (with undefs).
15722 // In the case of all undefs we're defaulting to using elems from 0
15723 // Note: StartMask cannot be negative, it's checked in
15724 // isReInterleaveMask
15725 Ops.push_back(Builder.CreateShuffleVector(
15726 Op0, Op1, createSequentialMask(Builder, StartMask, LaneLen, 0)));
15727 }
15728 }
15729
15730 Ops.push_back(Builder.getInt32(SI->getAlignment()));
15731 Builder.CreateCall(VstNFunc, Ops);
15732 }
15733 return true;
15734}
15735
15743
15745 uint64_t &Members) {
15746 if (auto *ST = dyn_cast<StructType>(Ty)) {
15747 for (unsigned i = 0; i < ST->getNumElements(); ++i) {
15748 uint64_t SubMembers = 0;
15749 if (!isHomogeneousAggregate(ST->getElementType(i), Base, SubMembers))
15750 return false;
15751 Members += SubMembers;
15752 }
15753 } else if (auto *AT = dyn_cast<ArrayType>(Ty)) {
15754 uint64_t SubMembers = 0;
15755 if (!isHomogeneousAggregate(AT->getElementType(), Base, SubMembers))
15756 return false;
15757 Members += SubMembers * AT->getNumElements();
15758 } else if (Ty->isFloatTy()) {
15759 if (Base != HA_UNKNOWN && Base != HA_FLOAT)
15760 return false;
15761 Members = 1;
15762 Base = HA_FLOAT;
15763 } else if (Ty->isDoubleTy()) {
15764 if (Base != HA_UNKNOWN && Base != HA_DOUBLE)
15765 return false;
15766 Members = 1;
15767 Base = HA_DOUBLE;
15768 } else if (auto *VT = dyn_cast<VectorType>(Ty)) {
15769 Members = 1;
15770 switch (Base) {
15771 case HA_FLOAT:
15772 case HA_DOUBLE:
15773 return false;
15774 case HA_VECT64:
15775 return VT->getBitWidth() == 64;
15776 case HA_VECT128:
15777 return VT->getBitWidth() == 128;
15778 case HA_UNKNOWN:
15779 switch (VT->getBitWidth()) {
15780 case 64:
15781 Base = HA_VECT64;
15782 return true;
15783 case 128:
15784 Base = HA_VECT128;
15785 return true;
15786 default:
15787 return false;
15788 }
15789 }
15790 }
15791
15792 return (Members > 0 && Members <= 4);
15793}
15794
15795/// Return the correct alignment for the current calling convention.
15796unsigned
15798 DataLayout DL) const {
15799 if (!ArgTy->isVectorTy())
15800 return DL.getABITypeAlignment(ArgTy);
15801
15802 // Avoid over-aligning vector parameters. It would require realigning the
15803 // stack and waste space for no real benefit.
15804 return std::min(DL.getABITypeAlignment(ArgTy), DL.getStackAlignment());
15805}
15806
15807/// Return true if a type is an AAPCS-VFP homogeneous aggregate or one of
15808/// [N x i32] or [N x i64]. This allows front-ends to skip emitting padding when
15809/// passing according to AAPCS rules.
15811 Type *Ty, CallingConv::ID CallConv, bool isVarArg) const {
15812 if (getEffectiveCallingConv(CallConv, isVarArg) !=
15814 return false;
15815
15816 HABaseType Base = HA_UNKNOWN;
15817 uint64_t Members = 0;
15818 bool IsHA = isHomogeneousAggregate(Ty, Base, Members);
15819 LLVM_DEBUG(dbgs() << "isHA: " << IsHA << " "; Ty->dump());
15820
15821 bool IsIntArray = Ty->isArrayTy() && Ty->getArrayElementType()->isIntegerTy();
15822 return IsHA || IsIntArray;
15823}
15824
15826 const Constant *PersonalityFn) const {
15827 // Platforms which do not use SjLj EH may return values in these registers
15828 // via the personality function.
15829 return Subtarget->useSjLjEH() ? ARM::NoRegister : ARM::R0;
15830}
15831
15833 const Constant *PersonalityFn) const {
15834 // Platforms which do not use SjLj EH may return values in these registers
15835 // via the personality function.
15836 return Subtarget->useSjLjEH() ? ARM::NoRegister : ARM::R1;
15837}
15838
15839void ARMTargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const {
15840 // Update IsSplitCSR in ARMFunctionInfo.
15841 ARMFunctionInfo *AFI = Entry->getParent()->getInfo<ARMFunctionInfo>();
15842 AFI->setIsSplitCSR(true);
15843}
15844
15845void ARMTargetLowering::insertCopiesSplitCSR(
15847 const SmallVectorImpl<MachineBasicBlock *> &Exits) const {
15848 const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
15849 const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
15850 if (!IStart)
15851 return;
15852
15853 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
15854 MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
15855 MachineBasicBlock::iterator MBBI = Entry->begin();
15856 for (const MCPhysReg *I = IStart; *I; ++I) {
15857 const TargetRegisterClass *RC = nullptr;
15858 if (ARM::GPRRegClass.contains(*I))
15859 RC = &ARM::GPRRegClass;
15860 else if (ARM::DPRRegClass.contains(*I))
15861 RC = &ARM::DPRRegClass;
15862 else
15863 llvm_unreachable("Unexpected register class in CSRsViaCopy!");
15864
15865 unsigned NewVR = MRI->createVirtualRegister(RC);
15866 // Create copy from CSR to a virtual register.
15867 // FIXME: this currently does not emit CFI pseudo-instructions, it works
15868 // fine for CXX_FAST_TLS since the C++-style TLS access functions should be
15869 // nounwind. If we want to generalize this later, we may need to emit
15870 // CFI pseudo-instructions.
15871 assert(Entry->getParent()->getFunction().hasFnAttribute(
15872 Attribute::NoUnwind) &&
15873 "Function should be nounwind in insertCopiesSplitCSR!");
15874 Entry->addLiveIn(*I);
15875 BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
15876 .addReg(*I);
15877
15878 // Insert the copy-back instructions right before the terminator.
15879 for (auto *Exit : Exits)
15880 BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(),
15881 TII->get(TargetOpcode::COPY), *I)
15882 .addReg(NewVR);
15883 }
15884}
15885
15889}
unsigned const MachineRegisterInfo * MRI
static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, int64_t &Cnt)
isVShiftRImm - Check if this is a valid build_vector for the immediate operand of a vector shift righ...
static bool areExtractExts(Value *Ext1, Value *Ext2)
Check if Ext1 and Ext2 are extends of the same type, doubling the bitwidth of the vector elements.
static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign, unsigned AlignCheck)
static EVT getExtensionTo64Bits(const EVT &OrigVT)
static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG, bool isSigned)
static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG)
static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG)
static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt)
getVShiftImm - Check if this is a valid build_vector for the immediate operand of a vector shift oper...
static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG)
static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG)
static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &dl)
GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit the specified operations t...
static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG)
static SDValue createGPRPairNode(SelectionDAG &DAG, SDValue V)
static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt)
isVShiftLImm - Check if this is a valid build_vector for the immediate operand of a vector shift left...
static bool isSignExtended(SDNode *N, SelectionDAG &DAG)
static bool isZeroExtended(SDNode *N, SelectionDAG &DAG)
static const unsigned PerfectShuffleTable[6561+1]
amdgpu aa AMDGPU Address space based Alias Analysis Wrapper
static bool isConstant(const MachineInstr &MI)
amdgpu Simplify well known AMD library false FunctionCallee Callee
amdgpu Simplify well known AMD library false FunctionCallee Value const Twine & Name
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG)
static bool isStore(int Opcode)
static bool isThumb(const MCSubtargetInfo &STI)
static bool isVREVMask(ArrayRef< int > M, EVT VT, unsigned BlockSize)
isVREVMask - Check if a vector shuffle corresponds to a VREV instruction with the specified blocksize...
static bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, MachineFrameInfo &MFI, const MachineRegisterInfo *MRI, const TargetInstrInfo *TII)
MatchingStackOffset - Return true if the given stack call argument is already available in the same p...
static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode, ARMCC::CondCodes &CondCode2, bool &InvalidOnQNaN)
FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
static SDValue LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(SDValue Op, SelectionDAG &DAG)
static SDValue LowerShift(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
@ HA_DOUBLE
@ HA_VECT128
@ HA_VECT64
@ HA_FLOAT
@ HA_UNKNOWN
static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue AddRequiredExtensionForVMULL(SDValue N, SelectionDAG &DAG, const EVT &OrigTy, const EVT &ExtTy, unsigned ExtOpcode)
AddRequiredExtensionForVMULL - Add a sign/zero extension to extend the total value size to 64 bits.
static cl::opt< unsigned > ConstpoolPromotionMaxSize("arm-promote-constant-max-size", cl::Hidden, cl::desc("Maximum size of constant to promote into a constant pool"), cl::init(64))
static bool isZeroOrAllOnes(SDValue N, bool AllOnes)
static bool isVTBLMask(ArrayRef< int > M, EVT VT)
static cl::opt< bool > EnableConstpoolPromotion("arm-promote-constant", cl::Hidden, cl::desc("Enable / disable promotion of unnamed_addr constants into " "constant pools"), cl::init(false))
static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, unsigned SplatBitSize, SelectionDAG &DAG, const SDLoc &dl, EVT &VT, bool is128Bits, NEONModImmType type)
isNEONModifiedImm - Check if the specified splat value corresponds to a valid vector constant for a N...
static const APInt * isPowerOf2Constant(SDValue V)
static SDValue PerformVCVTCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
PerformVCVTCombine - VCVT (floating-point to fixed-point, Advanced SIMD) can replace combinations of ...
static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG)
static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC)
IntCCToARMCC - Convert a DAG integer condition code to an ARM CC.
static SDValue ConvertBooleanCarryToCarryFlag(SDValue BoolCarry, SelectionDAG &DAG)
static bool isGTorGE(ISD::CondCode CC)
static bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a vldN-lane (N > 1) intrinsic,...
static SDValue ParseBFI(SDNode *N, APInt &ToMask, APInt &FromMask)
static bool isReverseMask(ArrayRef< int > M, EVT VT)
static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG)
static bool isVZIP_v_undef_Mask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of "vector_shuffle v,...
static SDValue AddCombineTo64bitUMAAL(SDNode *AddeNode, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG)
static bool isVTRNMask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
static SDValue AddCombineToVPADD(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue PerformShiftCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *ST)
PerformShiftCombine - Checks for immediate versions of vector shifts and lowers them.
static void ExpandREAD_REGISTER(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG)
static SDValue PerformARMBUILD_VECTORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
Target-specific dag combine xforms for ARMISD::BUILD_VECTOR.
static bool isSRL16(const SDValue &Op)
static SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static bool isLTorLE(ISD::CondCode CC)
static SDValue LowerSDIV_v4i16(SDValue N0, SDValue N1, const SDLoc &dl, SelectionDAG &DAG)
static SDValue AddCombineTo64bitMLAL(SDNode *AddeSubeNode, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue LowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG)
static bool checkAndUpdateCPSRKill(MachineBasicBlock::iterator SelectItr, MachineBasicBlock *BB, const TargetRegisterInfo *TRI)
static SDValue PerformBFICombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static bool hasNormalLoadOperand(SDNode *N)
hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node are normal,...
static SDValue PerformInsertEltCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
PerformInsertEltCombine - Target-specific dag combine xforms for ISD::INSERT_VECTOR_ELT.
static cl::opt< unsigned > ConstpoolPromotionMaxTotal("arm-promote-constant-max-total", cl::Hidden, cl::desc("Maximum size of ALL constants to promote into a constant pool"), cl::init(128))
static RTLIB::Libcall getDivRemLibcall(const SDNode *N, MVT::SimpleValueType SVT)
static SDValue PerformABSCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue SkipLoadExtensionForVMULL(LoadSDNode *LD, SelectionDAG &DAG)
SkipLoadExtensionForVMULL - return a load of the original vector size that does not do any sign/zero ...
static SDValue AddCombineVUZPToVPADDL(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static const MCPhysReg GPRArgRegs[]
static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformADDCombineWithOperands - Try DAG combinations for an ADD with operands N0 and N1.
static bool isVZIPMask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
static SDValue PerformORCombineToSMULWBT(SDNode *OR, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static bool isVTRN_v_undef_Mask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of "vector_shuffle v,...
static SDValue FindBFIToCombineWith(SDNode *N)
static SDValue ConvertCarryFlagToBooleanCarry(SDValue Flags, EVT VT, SelectionDAG &DAG)
ShuffleOpCodes
@ OP_VEXT3
@ OP_VTRNR
@ OP_VDUP1
@ OP_VZIPR
@ OP_VUZPR
@ OP_VREV
@ OP_VZIPL
@ OP_VTRNL
@ OP_COPY
@ OP_VEXT1
@ OP_VDUP0
@ OP_VEXT2
@ OP_VUZPL
@ OP_VDUP3
@ OP_VDUP2
static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode, bool &swpCmpOps, bool &swpVselOps)
static void ReplaceLongIntrinsic(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
static bool isS16(const SDValue &Op, SelectionDAG &DAG)
static bool isSRA16(const SDValue &Op)
static SDValue AddCombineBUILD_VECTORToVPADDL(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue LowerInterruptReturn(SmallVectorImpl< SDValue > &RetOps, const SDLoc &DL, SelectionDAG &DAG)
static SDValue LowerSDIV_v4i8(SDValue X, SDValue Y, const SDLoc &dl, SelectionDAG &DAG)
static void expandf64Toi32(SDValue Op, SelectionDAG &DAG, SDValue &RetVal1, SDValue &RetVal2)
static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue PerformVLDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static bool isSaturatingConditional(const SDValue &Op, SDValue &V, uint64_t &K, bool &usat)
static bool isUpperSaturate(const SDValue LHS, const SDValue RHS, const SDValue TrueVal, const SDValue FalseVal, const ISD::CondCode CC, const SDValue K)
static bool isSHL16(const SDValue &Op)
static bool isVEXTMask(ArrayRef< int > M, EVT VT, bool &ReverseVEXT, unsigned &Imm)
static SDValue PerformADDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
static unsigned getLdOpcode(unsigned LdSize, bool IsThumb1, bool IsThumb2)
Return the load opcode for a given load size.
static bool isLegalT2AddressImmediate(int64_t V, EVT VT, const ARMSubtarget *Subtarget)
static bool isLegalMVEShuffleOp(unsigned PFEntry)
static bool isVUZPMask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG)
PerformVECTOR_SHUFFLECombine - Target-specific dag combine xforms for ISD::VECTOR_SHUFFLE.
static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG)
SkipExtensionForVMULL - For a node that is a SIGN_EXTEND, ZERO_EXTEND, extending load,...
static MachineBasicBlock * OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ)
static SDValue PerformAddcSubcCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static TargetLowering::ArgListTy getDivRemArgList(const SDNode *N, LLVMContext *Context, const ARMSubtarget *Subtarget)
static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl)
getZeroVector - Returns a vector of specified type with all zero elements.
static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG)
static bool getT2IndexedAddressParts(SDNode *Ptr, EVT VT, bool isSEXTLoad, SDValue &Base, SDValue &Offset, bool &isInc, SelectionDAG &DAG)
static cl::opt< bool > ARMInterworking("arm-interworking", cl::Hidden, cl::desc("Enable / disable ARM interworking (for debugging only)"), cl::init(true))
static void ReplaceREADCYCLECOUNTER(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue PerformORCombineToBFI(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
ExpandBITCAST - If the target supports VFP, this function is called to expand a bit convert where eit...
static bool isConditionalZeroOrAllOnes(SDNode *N, bool AllOnes, SDValue &CC, bool &Invert, SDValue &OtherOp, SelectionDAG &DAG)
static SDValue PerformLOADCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue PerformAddeSubeCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static void ReplaceCMP_SWAP_64Results(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
static SDValue PerformVDUPCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformVDUPCombine - Target-specific dag combine xforms for ARMISD::VDUP.
static bool isLowerSaturate(const SDValue LHS, const SDValue RHS, const SDValue TrueVal, const SDValue FalseVal, const ISD::CondCode CC, const SDValue K)
static void emitPostSt(MachineBasicBlock *BB, MachineBasicBlock::iterator Pos, const TargetInstrInfo *TII, const DebugLoc &dl, unsigned StSize, unsigned Data, unsigned AddrIn, unsigned AddrOut, bool IsThumb1, bool IsThumb2)
Emit a post-increment store operation with given size.
static SDValue CombineBaseUpdate(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
CombineBaseUpdate - Target-specific DAG combine function for VLDDUP, NEON load/store intrinsics,...
static SDValue PerformVMOVRRDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformVMOVRRDCombine - Target-specific dag combine xforms for ARMISD::VMOVRRD.
static SDValue WinDBZCheckDenominator(SelectionDAG &DAG, SDNode *N, SDValue InChain)
static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op, ArrayRef< int > ShuffleMask, SelectionDAG &DAG)
static SDValue PerformVMULCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformVMULCombine Distribute (A + B) * C to (A * C) + (B * C) to take advantage of the special multi...
static SDValue PerformORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformORCombine - Target-specific dag combine xforms for ISD::OR.
static SDValue LowerADDSUBCARRY(SDValue Op, SelectionDAG &DAG)
static SDValue LowerFPOWI(SDValue Op, const ARMSubtarget &Subtarget, SelectionDAG &DAG)
static unsigned SelectPairHalf(unsigned Elements, ArrayRef< int > Mask, unsigned Index)
static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG)
static void emitPostLd(MachineBasicBlock *BB, MachineBasicBlock::iterator Pos, const TargetInstrInfo *TII, const DebugLoc &dl, unsigned LdSize, unsigned Data, unsigned AddrIn, unsigned AddrOut, bool IsThumb1, bool IsThumb2)
Emit a post-increment load operation with given size.
static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG)
PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics.
static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG, const ARMSubtarget *ST, const SDLoc &dl)
static SDValue PerformXORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
std::pair< unsigned, const TargetRegisterClass * > RCPair
static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, TargetLowering::DAGCombinerInfo &DCI, bool AllOnes=false)
static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND, ISD::ZERO_EXTEND,...
static SDValue CombineVMOVDRRCandidateWithVecOp(const SDNode *BC, SelectionDAG &DAG)
BC is a bitcast that is about to be turned into a VMOVDRR.
static SDValue promoteToConstantPool(const ARMTargetLowering *TLI, const GlobalValue *GV, SelectionDAG &DAG, EVT PtrVT, const SDLoc &dl)
static unsigned isNEONTwoResultShuffleMask(ArrayRef< int > ShuffleMask, EVT VT, unsigned &WhichResult, bool &isV_UNDEF)
Check if ShuffleMask is a NEON two-result shuffle (VZIP, VUZP, VTRN), and return the corresponding AR...
static bool BitsProperlyConcatenate(const APInt &A, const APInt &B)
static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT, bool isSEXTLoad, SDValue &Base, SDValue &Offset, bool &isInc, SelectionDAG &DAG)
static SDValue PerformSUBCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.
static bool allUsersAreInFunction(const Value *V, const Function *F)
Return true if all users of V are within function F, looking through ConstantExprs.
static SDValue PerformSTORECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
PerformSTORECombine - Target-specific dag combine xforms for ISD::STORE.
static bool isSingletonVEXTMask(ArrayRef< int > M, EVT VT, unsigned &Imm)
static SDValue PerformVMOVDRRCombine(SDNode *N, SelectionDAG &DAG)
PerformVMOVDRRCombine - Target-specific dag combine xforms for ARMISD::VMOVDRR.
static bool isLowerSaturatingConditional(const SDValue &Op, SDValue &V, SDValue &SatK)
static bool isLegalAddressImmediate(int64_t V, EVT VT, const ARMSubtarget *Subtarget)
isLegalAddressImmediate - Return true if the integer value can be used as the offset of the target ad...
static bool isLegalT1AddressImmediate(int64_t V, EVT VT)
static SDValue CombineANDShift(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG)
static SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG)
static SDValue PerformSHLSimplify(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *ST)
static SDValue PerformADDECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformADDECombine - Target-specific dag combine transform from ARMISD::ADDC, ARMISD::ADDE,...
static SDValue PerformUMLALCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue PerformHWLoopCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *ST)
static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG)
static bool isVUZP_v_undef_Mask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of "vector_shuffle v,...
static bool isHomogeneousAggregate(Type *Ty, HABaseType &Base, uint64_t &Members)
static SDValue PerformMULCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue PerformVDIVCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
PerformVDIVCombine - VCVT (fixed-point to floating-point, Advanced SIMD) can replace combinations of ...
static SDValue PerformANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static bool canChangeToInt(SDValue Op, bool &SeenZero, const ARMSubtarget *Subtarget)
canChangeToInt - Given the fp compare operand, return true if it is suitable to morph to an integer c...
static unsigned getStOpcode(unsigned StSize, bool IsThumb1, bool IsThumb2)
Return the store opcode for a given store size.
static bool IsVUZPShuffleNode(SDNode *N)
static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue AddCombineTo64BitSMLAL16(SDNode *AddcNode, SDNode *AddeNode, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static void attachMEMCPYScratchRegs(const ARMSubtarget *Subtarget, MachineInstr &MI, const SDNode *Node)
Attaches vregs to MEMCPY that it will use as scratch registers when it is expanded into LDM/STM.
static bool isFloatingPointZero(SDValue Op)
isFloatingPointZero - Return true if this is +0.0.
static SDValue findMUL_LOHI(SDValue V)
static SDValue PerformBUILD_VECTORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformBUILD_VECTORCombine - Target-specific dag combine xforms for ISD::BUILD_VECTOR.
static SDValue PerformVDUPLANECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
PerformVDUPLANECombine - Target-specific dag combine xforms for ARMISD::VDUPLANE.
Function Alias Analysis Results
Atomic ordering constants.
This file contains the simple types necessary to represent the attributes associated with functions a...
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< ShadowStackGC > C("shadow-stack", "Very portable GC for uncooperative code generators")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
Definition: CSEInfo.cpp:21
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:250
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static Optional< bool > isBigEndian(const SmallVector< int64_t, 4 > &ByteOffsets, int64_t FirstOffset)
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
else return RetTy
#define LLVM_DEBUG(X)
Definition: Debug.h:122
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static Function * getFunction(Constant *C)
Definition: Evaluator.cpp:258
#define op(i)
#define im(i)
const HexagonInstrInfo * TII
unsigned first
unsigned second
IRTranslator LLVM IR MI
std::pair< Value *, Value * > ShuffleOps
We are building a shuffle to create V, which is a sequence of insertelement, extractelement pairs.
static Value * LowerCTPOP(LLVMContext &Context, Value *V, Instruction *IP)
Emit the code to lower ctpop of V before the specified instruction IP.
lazy value info
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
unsigned const TargetRegisterInfo * TRI
static bool isVolatile(Instruction *Inst)
Module.h This file contains the declarations for the Module class.
uint64_t High
IntegerType * Int32Ty
LLVMContext & Context
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
static ManagedStatic< OptionRegistry > OR
Definition: Options.cpp:30
uint32_t Size
Definition: Profile.cpp:46
@ SI
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SI Lower i1 Copies
separate const offset from Split GEPs to a variadic base and a constant offset for better CSE
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:168
static const int BlockSize
Definition: TarWriter.cpp:33
This file describes how to lower LLVM code to machine code.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:364
static uint32_t Concat[]
APInt bitcastToAPInt() const
Definition: APFloat.h:1104
Class for arbitrary precision integers.
Definition: APInt.h:69
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1562
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:878
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1532
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:813
bool sgt(const APInt &RHS) const
Signed greather than comparison.
Definition: APInt.h:1273
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1508
static APInt getAllOnesValue(unsigned numBits)
Get the all-ones value.
Definition: APInt.h:561
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition: APInt.h:1184
unsigned countPopulation() const
Count the number of bits set.
Definition: APInt.h:1657
unsigned countTrailingZeros() const
Count the number of trailing zero bits.
Definition: APInt.h:1631
unsigned logBase2() const
Definition: APInt.h:1747
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition: APInt.h:481
bool isAllOnesValue() const
Determine if all bits are set.
Definition: APInt.h:395
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:463
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Get a value with low bits set.
Definition: APInt.h:647
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Get a value with high bits set.
Definition: APInt.h:635
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:587
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1574
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:977
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:970
virtual const ARMBaseRegisterInfo & getRegisterInfo() const =0
const uint32_t * getSjLjDispatchPreservedMask(const MachineFunction &MF) const
const MCPhysReg * getCalleeSavedRegs(const MachineFunction *MF) const override
Code Generation virtual methods...
Register getFrameRegister(const MachineFunction &MF) const override
const uint32_t * getCallPreservedMask(const MachineFunction &MF, CallingConv::ID) const override
const uint32_t * getTLSCallPreservedMask(const MachineFunction &MF) const
const uint32_t * getThisReturnPreservedMask(const MachineFunction &MF, CallingConv::ID) const
getThisReturnPreservedMask - Returns a call preserved mask specific to the case that 'returned' is on...
static ARMConstantPoolConstant * Create(const Constant *C, unsigned ID)
static ARMConstantPoolMBB * Create(LLVMContext &C, const MachineBasicBlock *mbb, unsigned ID, unsigned char PCAdj)
static ARMConstantPoolSymbol * Create(LLVMContext &C, StringRef s, unsigned ID, unsigned char PCAdj)
ARMConstantPoolValue - ARM specific constantpool value.
ARMFunctionInfo - This class is derived from MachineFunctionInfo and contains private ARM-specific in...
SmallPtrSet< const GlobalVariable *, 2 > & getGlobalsPromotedToConstantPool()
void setPromotedConstpoolIncrease(int Sz)
void setArgRegsSaveSize(unsigned s)
void setReturnRegsCount(unsigned s)
void setVarArgsFrameIndex(int Index)
unsigned getArgRegsSaveSize() const
void markGlobalAsPromotedToConstantPool(const GlobalVariable *GV)
Indicate to the backend that GV has had its storage changed to inside a constant pool.
void setArgumentStackSize(unsigned size)
bool isTargetMachO() const
Definition: ARMSubtarget.h:695
bool hasVMLxForwarding() const
Definition: ARMSubtarget.h:634
bool hasFPAO() const
Definition: ARMSubtarget.h:641
bool isThumb() const
Definition: ARMSubtarget.h:749
bool hasRetAddrStack() const
Definition: ARMSubtarget.h:662
bool hasNEON() const
Definition: ARMSubtarget.h:607
bool useMovt() const
bool isTargetAEABI() const
Definition: ARMSubtarget.h:704
bool hasV6Ops() const
Definition: ARMSubtarget.h:566
bool hasARMOps() const
Definition: ARMSubtarget.h:601
bool supportsTailCall() const
Definition: ARMSubtarget.h:781
const Triple & getTargetTriple() const
Definition: ARMSubtarget.h:682
bool hasVFP4Base() const
Definition: ARMSubtarget.h:605
const ARMBaseInstrInfo * getInstrInfo() const override
Definition: ARMSubtarget.h:521
bool isThumb1Only() const
Definition: ARMSubtarget.h:751
bool hasV5TOps() const
Definition: ARMSubtarget.h:564
bool hasThumb2() const
Definition: ARMSubtarget.h:753
bool hasFullFP16() const
Definition: ARMSubtarget.h:674
bool hasFPARMv8Base() const
Definition: ARMSubtarget.h:606
bool isThumb2() const
Definition: ARMSubtarget.h:752
bool isTargetWindows() const
Definition: ARMSubtarget.h:691
bool isGVIndirectSymbol(const GlobalValue *GV) const
True if the GV will be accessed via an indirect symbol.
bool useSjLjEH() const
Definition: ARMSubtarget.h:667
bool hasDivideInThumbMode() const
Definition: ARMSubtarget.h:621
bool isTargetDarwin() const
Definition: ARMSubtarget.h:684
const ARMBaseRegisterInfo * getRegisterInfo() const override
Definition: ARMSubtarget.h:533
bool hasVFP2Base() const
Definition: ARMSubtarget.h:603
bool isTargetAndroid() const
Definition: ARMSubtarget.h:735
bool isROPI() const
bool isTargetCOFF() const
Definition: ARMSubtarget.h:693
bool isTargetGNUAEABI() const
Definition: ARMSubtarget.h:709
bool hasVFP3Base() const
Definition: ARMSubtarget.h:604
bool isAPCS_ABI() const
bool isTargetWatchOS() const
Definition: ARMSubtarget.h:686
bool preferISHSTBarriers() const
Definition: ARMSubtarget.h:646
bool hasLOB() const
Definition: ARMSubtarget.h:614
bool hasFP64() const
Definition: ARMSubtarget.h:636
bool genLongCalls() const
Definition: ARMSubtarget.h:669
bool hasMinSize() const
Definition: ARMSubtarget.h:750
bool isFPBrccSlow() const
Definition: ARMSubtarget.h:635
unsigned getPrefLoopAlignment() const
Definition: ARMSubtarget.h:856
bool isTargetIOS() const
Definition: ARMSubtarget.h:685
bool useNEONForSinglePrecisionFP() const
Definition: ARMSubtarget.h:617
const InstrItineraryData * getInstrItineraryData() const override
getInstrItins - Return the instruction itineraries based on subtarget selection.
Definition: ARMSubtarget.h:808
bool isTargetWatchABI() const
Definition: ARMSubtarget.h:687
bool hasDSP() const
Definition: ARMSubtarget.h:665
bool hasV7Ops() const
Definition: ARMSubtarget.h:570
bool hasDataBarrier() const
Definition: ARMSubtarget.h:623
bool hasAnyDataBarrier() const
Definition: ARMSubtarget.h:628
bool isAAPCS_ABI() const
bool isRWPI() const
bool isLittle() const
Definition: ARMSubtarget.h:789
bool allowsUnalignedMem() const
Definition: ARMSubtarget.h:783
bool isTargetMuslAEABI() const
Definition: ARMSubtarget.h:714
bool useSoftFloat() const
Definition: ARMSubtarget.h:748
bool hasFPRegs16() const
Definition: ARMSubtarget.h:583
bool hasMPExtension() const
Definition: ARMSubtarget.h:664
bool hasMVEFloatOps() const
Definition: ARMSubtarget.h:581
bool hasFPRegs() const
Definition: ARMSubtarget.h:582
bool isMClass() const
Definition: ARMSubtarget.h:754
bool hasDivideInARMMode() const
Definition: ARMSubtarget.h:622
bool hasV6T2Ops() const
Definition: ARMSubtarget.h:569
bool hasV5TEOps() const
Definition: ARMSubtarget.h:565
bool isTargetHardFloat() const
bool useMulOps() const
Definition: ARMSubtarget.h:632
bool isTargetELF() const
Definition: ARMSubtarget.h:694
bool hasV8MBaselineOps() const
Definition: ARMSubtarget.h:577
bool useNaClTrap() const
Definition: ARMSubtarget.h:666
bool hasMVEIntegerOps() const
Definition: ARMSubtarget.h:580
bool hasFP16() const
Definition: ARMSubtarget.h:672
bool hasPerfMon() const
Definition: ARMSubtarget.h:637
bool hasAcquireRelease() const
Definition: ARMSubtarget.h:626
bool genExecuteOnly() const
Definition: ARMSubtarget.h:670
bool isReadOnly(const GlobalValue *GV) const
bool shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize, unsigned &PrefAlign) const override
Return true if the pointer arguments to CI should be aligned by aligning the object whose address is ...
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const override
Returns how the given (atomic) load should be expanded by the IR-level AtomicExpand pass.
unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL) const
Returns the number of interleaved accesses that will be generated when lowering accesses of the given...
bool shouldInsertFencesForAtomic(const Instruction *I) const override
Whether AtomicExpandPass should automatically insert fences and reduce ordering for this atomic.
void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
LowerAsmOperandForConstraint - Lower the specified operand into the Ops vector.
bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const override
Return true if it is profitable to move this shift by a constant amount though its operand,...
ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const override
Examine constraint string and operand type and determine a weight value.
unsigned getMaxSupportedInterleaveFactor() const override
Get the maximum supported factor for interleaved memory accesses.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
isLegalAddressingMode - Return true if the addressing mode represented by AM is legal for this target...
const ARMSubtarget * getSubtarget() const
bool isLegalT2ScaledAddressingMode(const AddrMode &AM, EVT VT) const
int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS) const override
getScalingFactorCost - Return the cost of the scaling used in addressing mode represented by AM.
bool isLegalInterleavedAccessType(VectorType *VecTy, const DataLayout &DL) const
Returns true if VecTy is a legal interleaved access type.
bool isLegalT1ScaledAddressingMode(const AddrMode &AM, EVT VT) const
Returns true if the addresing mode representing by AM is legal for the Thumb1 target,...
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
getPreIndexedAddressParts - returns true by value, base pointer and offset pointer and addressing mod...
unsigned getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
bool shouldSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override
Check if sinking I's operands to I's basic block is profitable, because the operands can be folded in...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
ReplaceNodeResults - Replace the results of node with an illegal result type with new values built ou...
bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override
Return true if SHIFT instructions should be expanded to SHIFT_PARTS instructions, and false if a libr...
bool isLegalAddImmediate(int64_t Imm) const override
isLegalAddImmediate - Return true if the specified immediate is legal add immediate,...
bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override
Returns true if the given (atomic) store should be expanded by the IR-level AtomicExpand pass into an...
bool isFNegFree(EVT VT) const override
Return true if an fneg operation is free to the point where it is never worthwhile to replace it with...
void finalizeLowering(MachineFunction &MF) const override
Execute target specific actions to finalize target lowering.
unsigned getABIAlignmentForCallingConv(Type *ArgTy, DataLayout DL) const override
Return the correct alignment for the current calling convention.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize=false) const override
isFPImmLegal - Returns true if the target can instruction select the specified FP immediate natively.
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint letter, return the type of constraint it is for this target.
bool preferIncOfAddToSubOfNot(EVT VT) const override
These two forms are equivalent: sub y, (xor x, -1) add (add x, 1), y The variant with two add's is IR...
Function * getSSPStackGuardCheck(const Module &M) const override
If the target has a standard stack protection check function that performs validation and error handl...
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
void insertSSPDeclarations(Module &M) const override
Inserts necessary declarations for SSP (stack protection) purpose.
bool ExpandInlineAsm(CallInst *CI) const override
This hook allows the target to expand an inline asm call to be explicit llvm code if it wants to.
SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const
PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV.
Value * getSDagStackGuard(const Module &M) const override
Return the variable that's previously inserted by insertSSPDeclarations, if any, otherwise return nul...
bool shouldFoldConstantShiftPairToMask(const SDNode *N, CombineLevel Level) const override
Return true if it is profitable to fold a pair of shifts into a mask.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
getSetCCResultType - Return the value type to use for ISD::SETCC.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
CCAssignFn * CCAssignFnForReturn(CallingConv::ID CC, bool isVarArg) const
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo) const override
createFastISel - This method returns a target specific FastISel object, or null if the target does no...
void AdjustInstrPostInstrSelection(MachineInstr &MI, SDNode *Node) const override
This method should be implemented by targets that mark instructions with the 'hasPostISelHook' flag.
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
Value * emitLoadLinked(IRBuilder<> &Builder, Value *Addr, AtomicOrdering Ord) const override
Perform a load-linked operation on Addr, returning a "Value *" with the corresponding pointee type.
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const override
Return true if EXTRACT_SUBVECTOR is cheap for this result type with this index.
bool isCheapToSpeculateCttz() const override
Return true if it is cheap to speculate a call to intrinsic cttz.
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
getTgtMemIntrinsic - Represent NEON load and store intrinsics as MemIntrinsicNodes.
bool isTruncateFree(Type *SrcTy, Type *DstTy) const override
Return true if it's free to truncate a value of type FromTy to type ToTy.
bool isShuffleMaskLegal(ArrayRef< int > M, EVT VT) const override
isShuffleMaskLegal - Targets can use this to indicate that they only support some VECTOR_SHUFFLE oper...
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Returns true if it is beneficial to convert a load of a constant to just the constant itself.
Instruction * makeDMB(IRBuilder<> &Builder, ARM_MB::MemBOpt Domain) const
bool useLoadStackGuardNode() const override
If this function returns true, SelectionDAGBuilder emits a LOAD_STACK_GUARD node when it is lowering ...
const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const override
getRegClassFor - Return the register class that should be used for the specified value type.
bool functionArgumentNeedsConsecutiveRegisters(Type *Ty, CallingConv::ID CallConv, bool isVarArg) const override
Returns true if an argument of type Ty needs to be passed in a contiguous block of registers in calli...
std::pair< const TargetRegisterClass *, uint8_t > findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const override
Return the largest legal super-reg register class of the register class for the specified type and it...
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
Value * emitStoreConditional(IRBuilder<> &Builder, Value *Val, Value *Addr, AtomicOrdering Ord) const override
Perform a store-conditional operation to Addr.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, unsigned Align, MachineMemOperand::Flags Flags, bool *Fast) const override
allowsMisalignedMemoryAccesses - Returns true if the target allows unaligned memory accesses of the s...
bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, unsigned Factor) const override
Lower an interleaved store into a vstN intrinsic.
bool isCheapToSpeculateCtlz() const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
SDValue PerformBRCONDCombine(SDNode *N, SelectionDAG &DAG) const
PerformBRCONDCombine - Target-specific DAG combining for ARMISD::BRCOND.
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, const AttributeList &FuncAttributes) const override
Returns the target specific optimal type for load and store operations as a result of memset,...
bool isLegalICmpImmediate(int64_t Imm) const override
isLegalICmpImmediate - Return true if the specified immediate is legal icmp immediate,...
const char * LowerXConstraint(EVT ConstraintVT) const override
Try to replace an X constraint, which matches anything, with another that has more specific requireme...
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
bool targetShrinkDemandedConstant(SDValue Op, const APInt &Demanded, TargetLoweringOpt &TLO) const override
Instruction * emitLeadingFence(IRBuilder<> &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
bool isDesirableToTransformToIntegerOp(unsigned Opc, EVT VT) const override
Return true if it is profitable for dag combiner to transform a floating point op of specified opcode...
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
CCAssignFn * CCAssignFnForCall(CallingConv::ID CC, bool isVarArg) const
Instruction * emitTrailingFence(IRBuilder<> &Builder, Instruction *Inst, AtomicOrdering Ord) const override
bool isVectorLoadExtDesirable(SDValue ExtVal) const override
Return true if folding a vector load into ExtVal (a sign, zero, or any extend node) is profitable.
bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx, unsigned &Cost) const override
Return true if the target can combine store(extractelement VectorTy, Idx).
bool lowerInterleavedLoad(LoadInst *LI, ArrayRef< ShuffleVectorInst * > Shuffles, ArrayRef< unsigned > Indices, unsigned Factor) const override
Lower an interleaved load into a vldN intrinsic.
bool useSoftFloat() const override
bool alignLoopsWithOptSize() const override
Should loops be aligned even when the function is marked OptSize (but not MinSize).
SDValue PerformCMOVToBFICombine(SDNode *N, SelectionDAG &DAG) const
bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override
Return true if a truncation from FromTy to ToTy is permitted when deciding whether a call is in tail ...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
void emitAtomicCmpXchgNoStoreLLBalance(IRBuilder<> &Builder) const override
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
unsigned getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
getPostIndexedAddressParts - returns true by value, base pointer and offset pointer and addressing mo...
This class represents an incoming formal argument to a Function.
Definition: Argument.h:29
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
iterator end() const
Definition: ArrayRef.h:137
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:148
iterator begin() const
Definition: ArrayRef.h:136
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:143
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:530
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:693
bool isFloatingPointOperation() const
Definition: Instructions.h:825
@ Sub
*p = old - v
Definition: Instructions.h:711
bool hasFnAttribute(Attribute::AttrKind Kind) const
Equivalent to hasAttribute(AttributeList::FunctionIndex, Kind) but may be faster.
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:106
The address of a basic block.
Definition: Constants.h:839
static BranchProbability getZero()
A "pseudo-class" with methods for operating on BUILD_VECTORs.
bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector.
int32_t getConstantFPSplatPow2ToLog2Int(BitVector *UndefElements, uint32_t BitWidth) const
If this is a constant FP splat and the splatted constant FP is an exact power or 2,...
CCState - This class holds information needed while lowering arguments and return values.
void getInRegsParamInfo(unsigned InRegsParamRecordIndex, unsigned &BeginReg, unsigned &EndReg) const
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
static bool resultsCompatible(CallingConv::ID CalleeCC, CallingConv::ID CallerCC, MachineFunction &MF, LLVMContext &C, const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn CalleeFn, CCAssignFn CallerFn)
Returns true if the results of the two calling conventions are compatible.
unsigned AllocateReg(unsigned Reg)
AllocateReg - Attempt to allocate one register.
unsigned getNextStackOffset() const
getNextStackOffset - Return the next stack offset such that all stack slots satisfy their alignment r...
void rewindByValRegsInfo()
unsigned getInRegsParamsProcessed() const
void addInRegsParamInfo(unsigned RegBegin, unsigned RegEnd)
void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
unsigned getInRegsParamsCount() const
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
unsigned getLocMemOffset() const
Register getLocReg() const
LocInfo getLocInfo() const
bool needsCustom() const
bool isMemLoc() const
unsigned getValNo() const
Value * getCalledValue() const
Definition: InstrTypes.h:1280
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
BBTy * getParent() const
Get the basic block containing the call site.
Definition: CallSite.h:101
bool isMustTailCall() const
Tests if this call site must be tail call optimized.
Definition: CallSite.h:279
const APFloat & getValueAPF() const
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:263
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition: Constant.h:41
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
unsigned getPrefTypeAlignment(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:756
unsigned getPreferredAlignment(const GlobalVariable *GV) const
Returns the preferred alignment of the specified global.
Definition: DataLayout.cpp:834
unsigned getABITypeAlignment(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:746
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space.
Definition: DataLayout.cpp:766
bool isBigEndian() const
Definition: DataLayout.h:233
unsigned getStackAlignment() const
Definition: DataLayout.h:268
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:469
StringRef getPrivateGlobalPrefix() const
Definition: DataLayout.h:316
uint64_t getTypeSizeInBits(Type *Ty) const
Size examples:
Definition: DataLayout.h:601
A debug info location.
Definition: DebugLoc.h:33
unsigned size() const
Definition: DenseMap.h:125
iterator begin()
Definition: DenseMap.h:99
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:221
LLVM_NODISCARD bool empty() const
Definition: DenseMap.h:122
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition: FastISel.h:66
TargetLoweringBase::ArgListTy ArgListTy
Definition: FastISel.h:69
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Definition: DerivedTypes.h:165
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
bool CanLowerReturn
CanLowerReturn - true iff the function's return value can be lowered to registers.
Type * getParamType(unsigned i) const
Parameter type accessors.
Definition: DerivedTypes.h:135
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:163
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:212
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:323
const GlobalValue * getGlobal() const
bool hasExternalWeakLinkage() const
Definition: GlobalValue.h:446
bool hasDLLImportStorageClass() const
Definition: GlobalValue.h:265
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:575
bool isStrongDefinitionForLinker() const
Returns true if this global's definition will be the one chosen by the linker.
Definition: GlobalValue.h:546
@ InternalLinkage
Rename collisions when linking (static functions).
Definition: GlobalValue.h:55
unsigned isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
TargetInstrInfo overrides.
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition: IRBuilder.h:383
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:126
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
Definition: IRBuilder.h:388
IntegerType * getInt16Ty()
Fetch the type representing a 16-bit integer.
Definition: IRBuilder.h:378
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:343
PointerType * getInt8PtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer to an 8-bit integer value.
Definition: IRBuilder.h:421
Type * getVoidTy()
Fetch the type representing void.
Definition: IRBuilder.h:416
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition: IRBuilder.h:373
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:779
Value * CreateZExtOrBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1961
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1951
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1207
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1294
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2305
Value * CreateTruncOrBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1979
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1878
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=None, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2193
Value * CreateIntToPtr(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1946
Value * CreateConstGEP1_32(Value *Ptr, unsigned Idx0, const Twine &Name="")
Definition: IRBuilder.h:1735
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1874
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2290
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1941
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1228
const std::string & getConstraintString() const
Definition: InlineAsm.h:81
const std::string & getAsmString() const
Definition: InlineAsm.h:80
int getOperandCycle(unsigned ItinClassIndx, unsigned OperandIdx) const
Return the cycle for the given class and operand.
bool isEmpty() const
Returns true if there are no itineraries.
bool hasAtomicStore() const
Return true if this atomic instruction stores to memory.
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:55
const BasicBlock * getParent() const
Definition: Instruction.h:66
Class to represent integer types.
Definition: DerivedTypes.h:40
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
Definition: DerivedTypes.h:66
NodeT & get() const
get - Dereference as a NodeT reference.
Definition: IntervalMap.h:526
static bool LowerToByteSwap(CallInst *CI)
Try to replace a call instruction with a call to a bswap intrinsic.
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:64
An instruction for reading from memory.
Definition: Instructions.h:167
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:290
Value * getPointerOperand()
Definition: Instructions.h:284
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:240
This class is used to represent ISD::LOAD nodes.
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:164
unsigned getSchedClass() const
Return the scheduling class for this instruction.
Definition: MCInstrDesc.h:582
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:211
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:226
int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const
Returns the value of the specific constraint if it is set.
Definition: MCInstrDesc.h:188
const MCOperandInfo * OpInfo
Definition: MCInstrDesc.h:175
bool isOptionalDef() const
Set if this operand is a optional def.
Definition: MCInstrDesc.h:95
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
SimpleValueType SimpleTy
bool isInteger() const
Return true if this is an integer or a vector integer type.
static mvt_range integer_valuetypes()
unsigned getScalarSizeInBits() const
static mvt_range vector_valuetypes()
static mvt_range integer_vector_valuetypes()
static MVT getVectorVT(MVT VT, unsigned NumElements)
unsigned getSizeInBits() const
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
static mvt_range fp_valuetypes()
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
bool isEHPad() const
Returns true if the block is a landing pad.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
void addLiveIn(MCPhysReg PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
bool isLiveIn(MCPhysReg Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
std::vector< MachineBasicBlock * >::iterator succ_iterator
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
void setIsEHPad(bool V=true)
Indicates the block is a landing pad.
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
unsigned getConstantPoolIndex(const Constant *C, unsigned Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
int CreateStackObject(uint64_t Size, unsigned Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setReturnAddressIsTaken(bool s)
void computeMaxCallFrameSize(const MachineFunction &MF)
Computes the maximum size of a callframe and the AdjustsStack property.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool hasVAStart() const
Returns true if the function calls the llvm.va_start intrinsic.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
int getFunctionContextIndex() const
Return the index for the function context object.
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
unsigned getFunctionNumber() const
getFunctionNumber - Return a unique ID for the current function.
MachineJumpTableInfo * getOrCreateJumpTableInfo(unsigned JTEntryKind)
getOrCreateJumpTableInfo - Get the JumpTableInfo for this function, if it does already exist,...
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
void push_back(MachineBasicBlock *MBB)
const Function & getFunction() const
Return the LLVM function that this machine code represents.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, unsigned base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
bool hasCallSiteLandingPad(MCSymbol *Sym)
Return true if the landing pad Eh symbol has an associated call site.
unsigned addLiveIn(unsigned PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
void insert(iterator MBBI, MachineBasicBlock *MBB)
SmallVectorImpl< unsigned > & getCallSiteLandingPad(MCSymbol *Sym)
Get the call site indexes for a landing pad EH symbol.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addConstantPoolIndex(unsigned Idx, int Offset=0, unsigned char TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addJumpTableIndex(unsigned Idx, unsigned char TargetFlags=0) const
Representation of each machine instruction.
Definition: MachineInstr.h:66
bool readsRegister(unsigned Reg, const TargetRegisterInfo *TRI=nullptr) const
Return true if the MachineInstr reads the specified register.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:432
bool definesRegister(unsigned Reg, const TargetRegisterInfo *TRI=nullptr) const
Return true if the MachineInstr fully defines the specified register.
unsigned createJumpTableIndex(const std::vector< MachineBasicBlock * > &DestBBs)
createJumpTableIndex - Create a new jump table.
@ EK_Inline
EK_Inline - Jump table entries are emitted inline at their point of use.
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MONonTemporal
The memory access is non-temporal.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
Flags getFlags() const
Return the raw flags of the source value,.
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateReg(unsigned Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setReg(unsigned Reg)
Change the register this operand corresponds to.
static MachineOperand CreateImm(int64_t Val)
Register getReg() const
getReg - Returns the register number.
void setIsDef(bool Val=true)
Change a def to a use, or a use to a def.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand.
This is an abstract virtual class for memory operations.
bool isVolatile() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
unsigned getAlignment() const
EVT getMemoryVT() const
Return the type of the in-memory value.
CCAssignFn * CCAssignFnForReturn() const
CCAssignFn * CCAssignFnForCall() const
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
virtual void print(raw_ostream &OS, const Module *M) const
print - Print out the internal state of the pass.
Definition: Pass.cpp:128
Class to represent pointers.
Definition: DerivedTypes.h:544
Type * getElementType() const
Definition: DerivedTypes.h:563
const PseudoSourceValue * getJumpTable()
Return a pseudo source value referencing a jump table.
const PseudoSourceValue * getGOT()
Return a pseudo source value referencing the global offset table (or something the like).
const PseudoSourceValue * getStack()
Return a pseudo source value referencing the area below the stack frame of a function,...
A static registration template.
Definition: Registry.h:114
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
bool use_empty() const
Return true if there are no uses of this node.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
bool hasAnyUseOfValue(unsigned Value) const
Return true if there are any use of the indicated value.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
const SDNodeFlags getFlags() const
static use_iterator use_end()
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
unsigned getScalarValueSizeInBits() const
SDValue getValue(unsigned R) const
void dump() const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:221
const SDValue & getRoot() const
Return the root tag of the SelectionDAG.
Definition: SelectionDAG.h:470
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
const TargetSubtargetInfo & getSubtarget() const
Definition: SelectionDAG.h:415
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
unsigned InferPtrAlignment(SDValue Ptr) const
Infer alignment of a load / store address.
SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
SDValue getTargetConstantPool(const Constant *C, EVT VT, unsigned Align=0, int Offset=0, unsigned char TargetFlags=0)
Definition: SelectionDAG.h:655
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:416
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:878
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
Definition: SelectionDAG.h:862
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:750
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:413
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned char TargetFlags=0)
Definition: SelectionDAG.h:649
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
Definition: SelectionDAG.h:579
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
Definition: SelectionDAG.h:850
SDValue getRegister(unsigned Reg, EVT VT)
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:414
SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:695
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
Definition: SelectionDAG.h:988
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned char TargetFlags=0)
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align=0, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, unsigned Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:592
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:410
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:721
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getRegisterMask(const uint32_t *RegMask)
SDValue getCondCode(ISD::CondCode Cond)
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
LLVMContext * getContext() const
Definition: SelectionDAG.h:420
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned char TargetFlags=0)
Definition: SelectionDAG.h:638
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:473
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
Type * getElementType() const
Definition: DerivedTypes.h:394
uint64_t getNumElements() const
For scalable vectors, this will return the minimum number of elements in the vector.
Definition: DerivedTypes.h:393
This instruction constructs a fixed permutation of two input vectors.
VectorType * getType() const
Overload to return most specific vector type.
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
static bool isSplatMask(const int *Mask, EVT VT)
int getMaskElt(unsigned Idx) const
ArrayRef< int > getMask() const
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:370
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:417
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:55
size_t size() const
Definition: SmallVector.h:52
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:315
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:374
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:467
typename SuperClass::iterator iterator
Definition: SmallVector.h:319
void push_back(const T &Elt)
Definition: SmallVector.h:211
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:837
An instruction for storing to memory.
Definition: Instructions.h:320
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
const unsigned char * bytes_end() const
Definition: StringRef.h:108
LLVM_NODISCARD size_t size() const
size - Get the string size.
Definition: StringRef.h:130
const unsigned char * bytes_begin() const
Definition: StringRef.h:105
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:42
LLVM_NODISCARD R Default(T Value)
Definition: StringSwitch.h:181
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:67
TargetInstrInfo - Interface to description of machine instruction set.
Provides information about what library functions are available for the current target.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
void setTargetDAGCombine(ISD::NodeType NT)
Targets should invoke this method for each target independent node that they want to provide a custom...
virtual void finalizeLowering(MachineFunction &MF) const
Execute target specific actions to finalize target lowering.
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
void setCmpLibcallCC(RTLIB::Libcall Call, ISD::CondCode CC)
Override the default CondCode to be used to test the result of the comparison libcall against zero.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
CallingConv::ID getLibcallCallingConv(RTLIB::Libcall Call) const
Get the CallingConv that should be used for the specified libcall.
unsigned MaxStoresPerMemcpyOptSize
Maximum number of store operations that may be substituted for a call to memcpy, used for functions w...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
void setIndexedStoreAction(unsigned IdxMode, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
const TargetMachine & getTargetMachine() const
bool isOperationLegalOrCustom(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
void setLibcallCallingConv(RTLIB::Libcall Call, CallingConv::ID CC)
Set the CallingConv that should be used for the specified libcall.
virtual Value * getSDagStackGuard(const Module &M) const
Return the variable that's previously inserted by insertSSPDeclarations, if any, otherwise return nul...
void setPrefLoopAlignment(unsigned Align)
Set the target's preferred loop alignment.
virtual Function * getSSPStackGuardCheck(const Module &M) const
If the target has a standard stack protection check function that performs validation and error handl...
Sched::Preference getSchedulingPreference() const
Return target scheduling preference.
void setMinFunctionAlignment(unsigned Align)
Set the target's minimum function alignment (in log2(bytes))
unsigned MaxStoresPerMemsetOptSize
Maximum number of stores operations that may be substituted for the call to memset,...
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
unsigned MaxStoresPerMemmove
Specify maximum bytes of store instructions per memmove call.
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL, bool LegalTypes=true) const
unsigned MaxStoresPerMemmoveOptSize
Maximum number of store instructions that may be substituted for a call to memmove,...
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
void setStackPointerRegisterToSaveRestore(unsigned R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setMinStackArgumentAlignment(unsigned Align)
Set the minimum stack alignment of an argument (in log2(bytes)).
virtual std::pair< const TargetRegisterClass *, uint8_t > findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const
Return the largest legal super-reg register class of the register class for the specified type and it...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
const char * getLibcallName(RTLIB::Libcall Call) const
Get the libcall routine name for the specified libcall.
std::vector< ArgListEntry > ArgListTy
unsigned MaxStoresPerMemcpy
Specify maximum bytes of store instructions per memcpy call.
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
virtual void insertSSPDeclarations(Module &M) const
Inserts necessary declarations for SSP (stack protection) purpose.
void setIndexedLoadAction(unsigned IdxMode, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
bool expandABS(SDNode *N, SDValue &Result, SelectionDAG &DAG) const
Expand ABS nodes.
virtual void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL) const
Soften the operands of a comparison.
bool parametersInCSRMatch(const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, const SmallVectorImpl< CCValAssign > &ArgLocs, const SmallVectorImpl< SDValue > &OutVals) const
Check whether parameters to a call that are passed in callee saved registers are the same as from the...
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool isPositionIndependent() const
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, SDValue &Chain) const
Check whether a given call node is in tail position within its function.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, bool isSigned, const SDLoc &dl, bool doesNotReturn=false, bool isReturnValueUsed=true, bool isPostTypeLegalization=false) const
Returns a pair of (return value, chain).
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:65
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
const Triple & getTargetTriple() const
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
TargetOptions Options
unsigned EnableFastISel
EnableFastISel - This flag enables fast-path instruction selection which trades away generated code q...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:43
ObjectFormatType getObjectFormat() const
getFormat - Get the object format for this triple.
Definition: Triple.h:326
bool isOSMSVCRT() const
Is this a "Windows" OS targeting a "MSVCRT.dll" environment.
Definition: Triple.h:575
bool isOSVersionLT(unsigned Major, unsigned Minor=0, unsigned Micro=0) const
isOSVersionLT - Helper function for doing comparisons against version numbers included in the target ...
Definition: Triple.h:414
bool isWindowsMSVCEnvironment() const
Checks if the environment could be MSVC.
Definition: Triple.h:548
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:80
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:229
PointerType * getPointerTo(unsigned AddrSpace=0) const
Return a pointer to the current type.
bool isArrayTy() const
True if this is an instance of ArrayType.
Definition: Type.h:220
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:223
Type * getArrayElementType() const
Definition: Type.h:364
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition: Type.h:146
void dump() const
Type * getVectorElementType() const
Definition: Type.h:371
unsigned getVectorNumElements() const
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
Definition: Type.h:143
unsigned getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition: Type.h:149
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
bool isFloatingPointTy() const
Return true if this is one of the six floating-point types.
Definition: Type.h:161
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:196
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition: Type.h:184
A Use represents the edge between a Value definition and its users.
Definition: Use.h:55
Value * getOperand(unsigned i) const
Definition: User.h:169
LLVM Value Representation.
Definition: Value.h:72
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:244
iterator_range< user_iterator > users()
Definition: Value.h:399
Class to represent vector types.
Definition: DerivedTypes.h:427
self_iterator getIterator()
Definition: ilist_node.h:81
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
static CondCodes getOppositeCondition(CondCodes CC)
Definition: ARMBaseInfo.h:48
@ SECREL
Thread Pointer Offset.
@ SBREL
Section Relative (Windows TLS)
@ GOTTPOFF
Global Offset Table, PC Relative.
@ TPOFF
Global Offset Table, Thread Pointer Offset.
TOF
Target Operand Flag enum.
Definition: ARMBaseInfo.h:238
@ MO_NONLAZY
MO_NONLAZY - This is an independent flag, on a symbol operand "FOO" it represents a symbol which,...
Definition: ARMBaseInfo.h:284
@ MO_SBREL
MO_SBREL - On a symbol operand, this represents a static base relative relocation.
Definition: ARMBaseInfo.h:266
@ MO_DLLIMPORT
MO_DLLIMPORT - On a symbol operand, this represents that the reference to the symbol is for an import...
Definition: ARMBaseInfo.h:271
@ MO_GOT
MO_GOT - On a symbol operand, this represents a GOT relative relocation.
Definition: ARMBaseInfo.h:262
@ MO_COFFSTUB
MO_COFFSTUB - On a symbol operand "FOO", this indicates that the reference is actually to the "....
Definition: ARMBaseInfo.h:259
static ShiftOpc getShiftOpcForNode(unsigned Opcode)
int getSOImmVal(unsigned Arg)
getSOImmVal - Given a 32-bit immediate, if it is something that can fit into an shifter_operand immed...
int getFP32Imm(const APInt &Imm)
getFP32Imm - Return an 8-bit floating-point version of the 32-bit floating-point value.
unsigned getAM2Offset(unsigned AM2Opc)
bool isThumbImmShiftedVal(unsigned V)
isThumbImmShiftedVal - Return true if the specified value can be obtained by left shifting a 8-bit im...
int getT2SOImmVal(unsigned Arg)
getT2SOImmVal - Given a 32-bit immediate, if it is something that can fit into a Thumb-2 shifter_oper...
int getFP64Imm(const APInt &Imm)
getFP64Imm - Return an 8-bit floating-point version of the 64-bit floating-point value.
unsigned createNEONModImm(unsigned OpCmode, unsigned Val)
uint64_t decodeNEONModImm(unsigned ModImm, unsigned &EltBits)
decodeNEONModImm - Decode a NEON modified immediate value into the element value and the element size...
int getFP16Imm(const APInt &Imm)
getFP16Imm - Return an 8-bit floating-point version of the 16-bit floating-point value.
unsigned getSORegOpc(ShiftOpc ShOp, unsigned Imm)
AddrOpc getAM2Op(unsigned AM2Opc)
bool isBitFieldInvertedMask(unsigned v)
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo)
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ ARM_APCS
ARM_APCS - ARM Procedure Calling Standard calling convention (obsolete, but still used on some target...
Definition: CallingConv.h:95
@ ARM_AAPCS
ARM_AAPCS - ARM Architecture Procedure Calling Standard calling convention (aka EABI).
Definition: CallingConv.h:99
@ Fast
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:42
@ ARM_AAPCS_VFP
ARM_AAPCS_VFP - Same as ARM_AAPCS, but uses hard floating point ABI.
Definition: CallingConv.h:102
@ C
C - The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:467
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:197
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
Definition: ISDOpcodes.h:730
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:726
@ CTLZ_ZERO_UNDEF
Definition: ISDOpcodes.h:440
@ FLT_ROUNDS_
FLT_ROUNDS_ - Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest 2 Round to ...
Definition: ISDOpcodes.h:570
@ EH_SJLJ_LONGJMP
OUTCHAIN = EH_SJLJ_LONGJMP(INCHAIN, buffer) This corresponds to the eh.sjlj.longjmp intrinsic.
Definition: ISDOpcodes.h:113
@ FGETSIGN
INT = FGETSIGN(FP) - Return the sign bit of the specified floating point value as an integer 0/1 valu...
Definition: ISDOpcodes.h:340
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:205
@ ATOMIC_LOAD_NAND
Definition: ISDOpcodes.h:848
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:437
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
Definition: ISDOpcodes.h:759
@ ConstantFP
Definition: ISDOpcodes.h:60
@ ATOMIC_LOAD_MAX
Definition: ISDOpcodes.h:850
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:820
@ ATOMIC_LOAD_UMIN
Definition: ISDOpcodes.h:851
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:200
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:642
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:495
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:326
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:165
@ RETURNADDR
Definition: ISDOpcodes.h:72
@ EH_SJLJ_SETUP_DISPATCH
OUTCHAIN = EH_SJLJ_SETUP_DISPATCH(INCHAIN) The target initializes the dispatch table here.
Definition: ISDOpcodes.h:117
@ GlobalAddress
Definition: ISDOpcodes.h:61
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:502
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:369
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:287
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:417
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
Definition: ISDOpcodes.h:812
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:209
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition: ISDOpcodes.h:605
@ ATOMIC_LOAD_OR
Definition: ISDOpcodes.h:846
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:595
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:190
@ ATOMIC_LOAD_XOR
Definition: ISDOpcodes.h:847
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:913
@ GlobalTLSAddress
Definition: ISDOpcodes.h:61
@ FrameIndex
Definition: ISDOpcodes.h:61
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:489
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:399
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:440
@ WRITE_REGISTER
Definition: ISDOpcodes.h:85
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:807
@ ADDCARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:241
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:636
@ SETCCCARRY
Like SetCC, ops #0 and #1 are the LHS and RHS operands to compare, but op #2 is a boolean indicating ...
Definition: ISDOpcodes.h:475
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:610
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:678
@ SSUBO
Same for subtraction.
Definition: ISDOpcodes.h:253
@ ATOMIC_LOAD_MIN
Definition: ISDOpcodes.h:849
@ BR_JT
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:666
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:444
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:816
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:177
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition: ISDOpcodes.h:183
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
Definition: ISDOpcodes.h:755
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:174
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition: ISDOpcodes.h:250
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:404
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:434
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:391
@ ATOMIC_LOAD_AND
Definition: ISDOpcodes.h:844
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR (an vector value) starting with the ...
Definition: ISDOpcodes.h:382
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition: ISDOpcodes.h:84
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:363
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:169
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:492
@ DEBUGTRAP
DEBUGTRAP - Trap intended to get the attention of a debugger.
Definition: ISDOpcodes.h:801
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:459
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
Definition: ISDOpcodes.h:827
@ ATOMIC_LOAD_UMAX
Definition: ISDOpcodes.h:852
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:622
@ SMULO
Same for multiplication.
Definition: ISDOpcodes.h:256
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
Definition: ISDOpcodes.h:651
@ ConstantPool
Definition: ISDOpcodes.h:62
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:510
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum or signed or unsigned integers.
Definition: ISDOpcodes.h:408
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:580
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:453
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition: ISDOpcodes.h:72
@ ATOMIC_LOAD_ADD
Definition: ISDOpcodes.h:842
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition: ISDOpcodes.h:633
@ ATOMIC_LOAD_SUB
Definition: ISDOpcodes.h:843
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:548
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
Definition: ISDOpcodes.h:778
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:411
@ TRAP
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:798
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:150
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:356
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:49
@ ATOMIC_SWAP
Val, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amt) Val, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN,...
Definition: ISDOpcodes.h:841
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:562
@ EH_SJLJ_SETJMP
RESULT, OUTCHAIN = EH_SJLJ_SETJMP(INCHAIN, buffer) This corresponds to the eh.sjlj....
Definition: ISDOpcodes.h:107
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:498
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
Definition: ISDOpcodes.h:750
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:672
@ BlockAddress
Definition: ISDOpcodes.h:62
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:480
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:56
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:336
@ AssertZext
Definition: ISDOpcodes.h:56
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:158
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a vector with the specified, possibly variable,...
Definition: ISDOpcodes.h:351
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
bool isEXTLoad(const SDNode *N)
Returns true if the specified node is a EXTLOAD.
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
CondCode getSetCCInverse(CondCode Operation, bool isInteger)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:950
bool isSEXTLoad(const SDNode *N)
Returns true if the specified node is a SEXTLOAD.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:995
static const int LAST_INDEXED_MODE
Definition: ISDOpcodes.h:958
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1043
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Definition: MCInstrDesc.h:117
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:47
match_combine_or< CastClass_match< OpTy, Instruction::ZExt >, CastClass_match< OpTy, Instruction::SExt > > m_ZExtOrSExt(const OpTy &Op)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:70
Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPEXT(EVT OpVT, EVT RetVT)
getFPEXT - Return the FPEXT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition: LLVMContext.h:51
@ GeneralDynamic
Definition: CodeGen.h:43
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
This class represents lattice values for constants.
Definition: AllocatorList.h:23
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
constexpr bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:345
constexpr bool isUInt< 8 >(uint64_t x)
Definition: MathExtras.h:342
bool CC_ARM_APCS_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool RetCC_ARM_AAPCS_VFP(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1192
bool operator==(uint64_t V1, const APInt &V2)
Definition: APInt.h:1966
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:392
constexpr bool isMask_32(uint32_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:404
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
bool FastCC_ARM_APCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool RetCC_ARM_AAPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool isAcquireOrStronger(AtomicOrdering ao)
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:433
auto find(R &&Range, const T &Val) -> decltype(adl_begin(Range))
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1213
static std::array< MachineOperand, 2 > predOps(ARMCC::CondCodes Pred, unsigned PredReg=0)
Get the operands corresponding to the given Pred value.
bool CC_ARM_AAPCS_VFP(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
constexpr bool isShiftedMask_32(uint32_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (32 bit ver...
Definition: MathExtras.h:416
bool CC_ARM_APCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
void SplitString(StringRef Source, SmallVectorImpl< StringRef > &OutFragments, StringRef Delimiters=" \t\n\v\f\r")
SplitString - Split up the specified string according to the specified delimiters,...
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:538
bool RetCC_ARM_APCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
unsigned countLeadingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: MathExtras.h:188
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:428
constexpr size_t array_lengthof(T(&)[N])
Find the length of an array.
Definition: STLExtras.h:1050
unsigned countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: MathExtras.h:119
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:450
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:139
Constant * createSequentialMask(IRBuilder<> &Builder, unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
unsigned countTrailingOnes(T Value, ZeroBehavior ZB=ZB_Width)
Count the number of ones from the least significant bit to the first zero bit.
Definition: MathExtras.h:477
@ MVEVMVNModImm
@ Mod
The access may modify the value stored in memory.
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
CombineLevel
Definition: DAGCombine.h:15
@ BeforeLegalizeTypes
Definition: DAGCombine.h:16
bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:397
bool RetFastCC_ARM_APCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool CC_ARM_AAPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:614
auto count_if(R &&Range, UnaryPredicate P) -> typename std::iterator_traits< decltype(adl_begin(Range))>::difference_type
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1266
raw_ostream & errs()
This returns a reference to a raw_ostream for standard error.
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
AtomicOrdering
Atomic ordering for LLVM's memory model.
static MachineOperand t1CondCodeOp(bool isDead=false)
Get the operand corresponding to the conditional code result for Thumb1.
Value * concatenateVectors(IRBuilder<> &Builder, ArrayRef< Value * > Vecs)
Concatenate a list of vectors.
bool isReleaseOrStronger(AtomicOrdering ao)
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
static MachineOperand condCodeOp(unsigned CCReg=0)
Get the operand corresponding to the conditional code result.
bool isStrongerThanMonotonic(AtomicOrdering ao)
unsigned convertAddSubFlagsOpcode(unsigned OldOpc)
Map pseudo instructions that imply an 'S' bit onto real opcodes.
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:940
#define N
Extended Value Type.
Definition: ValueTypes.h:33
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:95
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:125
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:72
unsigned getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:291
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:228
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:240
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:135
unsigned getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:303
static EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:340
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:252
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition: ValueTypes.h:181
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition: ValueTypes.h:57
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:150
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:259
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:217
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:264
unsigned getScalarSizeInBits() const
Definition: ValueTypes.h:297
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:145
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:272
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:140
bool is64BitVector() const
Return true if this is a 64-bit vector type.
Definition: ValueTypes.h:176
CallLoweringInfo & setCallee(Type *ResultTy, FunctionType *FuncTy, const Value *Target, ArgListTy &&ArgsList, ImmutableCallSite &Call)
Definition: FastISel.h:104
unsigned getByValSize() const
unsigned getByValAlign() const
bool isUnknown() const
Returns true if we don't know any bits.
Definition: KnownBits.h:62
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:39
void resetAll()
Resets the known state of all bits.
Definition: KnownBits.h:65
KnownBits sext(unsigned BitWidth) const
Sign extends the underlying known Zero and One bits.
Definition: KnownBits.h:130
KnownBits zext(unsigned BitWidth, bool ExtendedBitsAreKnownZero) const
Extends the underlying known Zero and One bits.
Definition: KnownBits.h:120
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg If BaseGV is null...
This contains information for each constraint that we are lowering.
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
CallLoweringInfo & setCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setTailCall(bool Value=true)
SmallVector< ISD::OutputArg, 32 > Outs
SmallVector< SDValue, 32 > OutVals
CallLoweringInfo & setChain(SDValue InChain)
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...
bool CombineTo(SDValue O, SDValue N)
bool isReadOnly() const